mem-cache: Proactive Instruction Fetch Implementation

Ferdman, M., Kaynak, C., & Falsafi, B. (2011, December).
Proactive instruction fetch. In Proceedings of the 44th Annual IEEE/ACM
International Symposium on Microarchitecture (pp. 152-162). ACM.

Change-Id: I38c3ab30a94ab279f03e3d5936ce8ed118310c0e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16968
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py
index cb0a1c3..404a442 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -43,6 +43,7 @@
 from m5.params import *
 from m5.proxy import *
 
+from m5.objects.BaseCPU import BaseCPU
 from m5.objects.ClockedObject import ClockedObject
 from m5.objects.IndexingPolicies import *
 from m5.objects.ReplacementPolicies import *
@@ -444,3 +445,42 @@
         "Number of entries of the Region Miss Order Buffer")
     reconstruction_entries = Param.Unsigned(256,
         "Number of reconstruction entries")
+
+class HWPProbeEventRetiredInsts(HWPProbeEvent):
+    def register(self):
+        if self.obj:
+            for name in self.names:
+                self.prefetcher.getCCObject().addEventProbeRetiredInsts(
+                    self.obj.getCCObject(), name)
+
+class PIFPrefetcher(QueuedPrefetcher):
+    type = 'PIFPrefetcher'
+    cxx_class = 'PIFPrefetcher'
+    cxx_header = "mem/cache/prefetch/pif.hh"
+    cxx_exports = [
+        PyBindMethod("addEventProbeRetiredInsts"),
+    ]
+
+    prec_spatial_region_bits = Param.Unsigned(2,
+        "Number of preceding addresses in the spatial region")
+    succ_spatial_region_bits = Param.Unsigned(8,
+        "Number of subsequent addresses in the spatial region")
+    compactor_entries = Param.Unsigned(2, "Entries in the temp. compactor")
+    stream_address_buffer_entries = Param.Unsigned(7, "Entries in the SAB")
+    history_buffer_size = Param.Unsigned(16, "Entries in the history buffer")
+
+    index_entries = Param.MemorySize("64",
+        "Number of entries in the index")
+    index_assoc = Param.Unsigned(64,
+        "Associativity of the index")
+    index_indexing_policy = Param.BaseIndexingPolicy(
+        SetAssociative(entry_size = 1, assoc = Parent.index_assoc,
+        size = Parent.index_entries),
+        "Indexing policy of the index")
+    index_replacement_policy = Param.BaseReplacementPolicy(LRURP(),
+        "Replacement policy of the index")
+
+    def listenFromProbeRetiredInstructions(self, simObj):
+        if not isinstance(simObj, BaseCPU):
+            raise TypeError("argument must be of BaseCPU type")
+        self.addEvent(HWPProbeEventRetiredInsts(self, simObj,"RetiredInstsPC"))
diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript
index 5b65338..0dae74e 100644
--- a/src/mem/cache/prefetch/SConscript
+++ b/src/mem/cache/prefetch/SConscript
@@ -38,6 +38,7 @@
 Source('delta_correlating_prediction_tables.cc')
 Source('irregular_stream_buffer.cc')
 Source('indirect_memory.cc')
+Source('pif.cc')
 Source('queued.cc')
 Source('sbooe.cc')
 Source('signature_path.cc')
diff --git a/src/mem/cache/prefetch/pif.cc b/src/mem/cache/prefetch/pif.cc
new file mode 100644
index 0000000..04064d4
--- /dev/null
+++ b/src/mem/cache/prefetch/pif.cc
@@ -0,0 +1,259 @@
+/**
+ * Copyright (c) 2019 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ivan Pizarro
+ */
+
+#include "mem/cache/prefetch/pif.hh"
+
+#include <cmath>
+#include <utility>
+
+#include "debug/HWPrefetch.hh"
+#include "mem/cache/prefetch/associative_set_impl.hh"
+#include "params/PIFPrefetcher.hh"
+
+PIFPrefetcher::PIFPrefetcher(const PIFPrefetcherParams *p)
+    : QueuedPrefetcher(p),
+      precSize(p->prec_spatial_region_bits),
+      succSize(p->succ_spatial_region_bits),
+      maxCompactorEntries(p->compactor_entries),
+      maxStreamAddressBufferEntries(p->stream_address_buffer_entries),
+      historyBuffer(p->history_buffer_size),
+      historyBufferTail(0),
+      index(p->index_assoc, p->index_entries, p->index_indexing_policy,
+            p->index_replacement_policy),
+      streamAddressBuffer(), listenersPC()
+{
+}
+
+PIFPrefetcher::CompactorEntry::CompactorEntry(Addr addr,
+    unsigned int prec_size, unsigned int succ_size)
+{
+    trigger = addr;
+    prec.resize(prec_size, false);
+    succ.resize(succ_size, false);
+}
+
+unsigned int
+PIFPrefetcher::CompactorEntry::distanceFromTrigger(Addr target,
+        unsigned int log_blk_size) const {
+    const Addr target_blk = target >> log_blk_size;
+    const Addr trigger_blk = trigger >> log_blk_size;
+
+    return std::abs(target_blk - trigger_blk);
+}
+
+bool
+PIFPrefetcher::CompactorEntry::inSameSpatialRegion(Addr pc,
+        unsigned int log_blk_size, bool update)
+{
+    unsigned int blk_distance = distanceFromTrigger(pc, log_blk_size);
+
+    bool hit = (pc > trigger) ?
+        (succ.size() >= blk_distance) : (prec.size() >= blk_distance);
+    if (hit && update) {
+        if (pc > trigger) {
+            succ[blk_distance - 1] = true;
+        } else if (pc < trigger) {
+            prec[blk_distance - 1] = true;
+        }
+    }
+    return hit;
+}
+
+bool
+PIFPrefetcher::CompactorEntry::hasAddress(Addr target,
+                                          unsigned int log_blk_size) const
+{
+    unsigned int blk_distance = distanceFromTrigger(target, log_blk_size);
+    bool hit = false;
+    if (target > trigger) {
+        hit = blk_distance <= succ.size() && succ[blk_distance - 1];
+    } else if (target < trigger) {
+        hit = blk_distance <= prec.size() && succ[blk_distance - 1];
+    } else {
+        hit = true;
+    }
+    return hit;
+}
+
+void
+PIFPrefetcher::CompactorEntry::getPredictedAddresses(unsigned int log_blk_size,
+    std::vector<AddrPriority> &addresses) const
+{
+    // Calculate the addresses of the instruction blocks that are encoded
+    // by the bit vector and issue prefetch requests for these addresses.
+    // Predictions are made by traversing the bit vector from left to right
+    // as this typically predicts the accesses in the order they will be
+    // issued in the core.
+    const Addr trigger_blk = trigger >> log_blk_size;
+    for (int i = prec.size()-1; i >= 0; i--) {
+        // Address from the preceding blocks to issue a prefetch
+        if (prec[i]) {
+            const Addr prec_addr = (trigger_blk - (i+1)) << log_blk_size;
+            addresses.push_back(AddrPriority(prec_addr, 0));
+        }
+    }
+    for (int i = 0; i < succ.size(); i++) {
+        // Address from the succeding blocks to issue a prefetch
+        if (succ[i]) {
+            const Addr succ_addr = (trigger_blk + (i+1)) << log_blk_size;
+            addresses.push_back(AddrPriority(succ_addr, 0));
+        }
+    }
+}
+
+void
+PIFPrefetcher::notifyRetiredInst(const Addr pc)
+{
+    // First access to the prefetcher
+    if (temporalCompactor.size() == 0) {
+        spatialCompactor = CompactorEntry(pc, precSize, succSize);
+    } else {
+        // If the PC of the instruction retired is in the same spatial region
+        // than the last trigger address, update the bit vectors based on the
+        // distance between them
+        if (spatialCompactor.inSameSpatialRegion(pc, lBlkSize, true)) {
+        // If the PC of the instruction retired is outside the latest spatial
+        // region, check if it matches in any of the regions in the temporal
+        // compactor and update it to the MRU position
+        } else {
+            bool is_in_temporal_compactor = false;
+
+            // Check if the PC is in the temporal compactor
+            for (auto it = temporalCompactor.begin();
+                    it != temporalCompactor.end(); it++)
+            {
+                if (it->inSameSpatialRegion(pc, lBlkSize, false)) {
+                    spatialCompactor = (*it);
+                    temporalCompactor.erase(it);
+                    is_in_temporal_compactor = true;
+                    break;
+                }
+            }
+
+            if (temporalCompactor.size() == maxCompactorEntries) {
+                temporalCompactor.pop_front(); // Discard the LRU entry
+            }
+
+            temporalCompactor.push_back(spatialCompactor);
+
+            // If the compactor entry is neither the spatial or can't be
+            // found in the temporal compactor, reset the spatial compactor
+            // updating the trigger address and resetting the vector bits
+            if (!is_in_temporal_compactor) {
+                // Insert the spatial entry into the history buffer and update
+                // the 'index' table to point to the new entry
+                historyBuffer[historyBufferTail] = spatialCompactor;
+
+                IndexEntry *idx_entry =
+                    index.findEntry(spatialCompactor.trigger, false);
+                if (idx_entry != nullptr) {
+                    index.accessEntry(idx_entry);
+                } else {
+                    idx_entry = index.findVictim(spatialCompactor.trigger);
+                    assert(idx_entry != nullptr);
+                    index.insertEntry(spatialCompactor.trigger, false,
+                                      idx_entry);
+                }
+                idx_entry->historyIndex = historyBufferTail;
+
+                historyBufferTail++;
+                if (historyBufferTail == historyBuffer.size()) {
+                    historyBufferTail = 0;
+                }
+
+                // Reset the spatial compactor fields with the new address
+                spatialCompactor = CompactorEntry(pc, precSize, succSize);
+            }
+        }
+    }
+}
+
+void
+PIFPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
+    std::vector<AddrPriority> &addresses)
+{
+    const Addr addr = pfi.getAddr();
+
+    // First check if the access has been prefetched, this is done by
+    // comparing the access against the active Stream Address Buffers
+    for (auto &sabEntry : streamAddressBuffer) {
+        if (sabEntry->hasAddress(addr, lBlkSize)) {
+            // Advance to the next entry (first check if we have reached the
+            // end of the history buffer)
+            if (sabEntry == &(historyBuffer[historyBuffer.size() - 1])) {
+                sabEntry = &(historyBuffer[0]);
+            } else {
+                sabEntry++;
+            }
+            sabEntry->getPredictedAddresses(lBlkSize, addresses);
+            // We are done
+            return;
+        }
+    }
+
+    // Check if a valid entry in the 'index' table is found and allocate a new
+    // active prediction stream
+    IndexEntry *idx_entry = index.findEntry(addr, /* unused */ false);
+
+    if (idx_entry != nullptr) {
+        index.accessEntry(idx_entry);
+        // Trigger address from the 'index' table and index to the history
+        // buffer
+        const unsigned int hb_entry = idx_entry->historyIndex;
+        CompactorEntry *entry = &historyBuffer[hb_entry];
+
+        // Track the block in the Stream Address Buffer
+        if (streamAddressBuffer.size() == maxStreamAddressBufferEntries) {
+            streamAddressBuffer.pop_front();
+        }
+        streamAddressBuffer.push_back(entry);
+
+        entry->getPredictedAddresses(lBlkSize, addresses);
+    }
+}
+
+void
+PIFPrefetcher::PrefetchListenerPC::notify(const Addr& pc)
+{
+    parent.notifyRetiredInst(pc);
+}
+
+void
+PIFPrefetcher::addEventProbeRetiredInsts(SimObject *obj, const char *name)
+{
+    ProbeManager *pm(obj->getProbeManager());
+    listenersPC.push_back(new PrefetchListenerPC(*this, pm, name));
+}
+
+PIFPrefetcher*
+PIFPrefetcherParams::create()
+{
+    return new PIFPrefetcher(this);
+}
diff --git a/src/mem/cache/prefetch/pif.hh b/src/mem/cache/prefetch/pif.hh
new file mode 100644
index 0000000..6516b2c
--- /dev/null
+++ b/src/mem/cache/prefetch/pif.hh
@@ -0,0 +1,191 @@
+/**
+ * Copyright (c) 2019 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ivan Pizarro
+ */
+
+/** Implementation of the 'Proactive Instruction Fetch' prefetcher
+ *  Reference:
+ *    Ferdman, M., Kaynak, C., & Falsafi, B. (2011, December).
+ *    Proactive instruction fetch.
+ *    In Proceedings of the 44th Annual IEEE/ACM International Symposium
+ *    on Microarchitecture (pp. 152-162). ACM.
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_PIF_HH__
+#define __MEM_CACHE_PREFETCH_PIF_HH__
+
+#include <deque>
+#include <vector>
+
+#include "mem/cache/prefetch/associative_set.hh"
+#include "mem/cache/prefetch/queued.hh"
+
+struct PIFPrefetcherParams;
+
+class PIFPrefetcher : public QueuedPrefetcher
+{
+    private:
+        /** Number of preceding and subsequent spatial addresses to compact */
+        const unsigned int precSize;
+        const unsigned int succSize;
+        /** Number of entries used for the temporal compactor */
+        const unsigned int maxCompactorEntries;
+        /** Max number of entries to be used in the Stream Address Buffer */
+        const unsigned int maxStreamAddressBufferEntries;
+
+        /**
+         * The compactor tracks retired instructions addresses, leveraging the
+         * spatial and temporal locality among instructions for compaction. It
+         *comprises the spatial and temporal compaction mechanisms.
+         *
+         * Taking advantage of the spatial locality across instruction blocks,
+         * the spatial compactor combines instruction-block addresses that fall
+         * within a 'spatial region', a group of adjacent instruction blocks.
+         * When an instruction outside the current spatial region retires, the
+         * existing spatial region is sent to the temporal compactor.
+         *
+         * The temporal compactor tracks a small number of the
+         * most-recently-observed spatial region records.
+         */
+        struct CompactorEntry {
+            Addr trigger;
+            std::vector<bool> prec;
+            std::vector<bool> succ;
+            CompactorEntry() {}
+            CompactorEntry(Addr, unsigned int, unsigned int);
+
+            /**
+             * Checks if a given address is in the same defined spatial region
+             * as the compactor entry.
+             * @param addr Address to check if it's inside the spatial region
+             * @param log_blk_distance log_2(block size of the cache)
+             * @param update if true, set the corresponding succ/prec entry
+             * @return TRUE if they are in the same spatial region, FALSE
+             *   otherwise
+             */
+            bool inSameSpatialRegion(Addr addr, unsigned int log_blk_size,
+                                     bool update);
+            /**
+             * Checks if the provided address is contained in this spatial
+             * region and if its corresponding bit vector entry is set
+             * @param target address to check
+             * @param log_blk_distance log_2(block size of the cache)
+             * @return TRUE if target has its bit set
+             */
+            bool hasAddress(Addr target, unsigned int log_blk_size) const;
+
+            /**
+             * Fills the provided vector with the predicted addresses using the
+             * recorded bit vectors of the entry
+             * @param log_blk_distance log_2(block size of the cache)
+             * @param addresses reference to a vector to add the generated
+             * addresses
+             */
+            void getPredictedAddresses(unsigned int log_blk_size,
+                    std::vector<AddrPriority> &addresses) const;
+          private:
+            /**
+             * Computes the distance, in cache blocks, from an address to the
+             * trigger of the entry.
+             * @param addr address to compute the distance from the trigger
+             * @param log_blk_distance log_2(block size of the cache)
+             * @result distance in cache blocks from the address to the trigger
+             */
+            unsigned int distanceFromTrigger(Addr addr,
+                                             unsigned int log_blk_size) const;
+        };
+
+        CompactorEntry spatialCompactor;
+        std::deque<CompactorEntry> temporalCompactor;
+
+        /**
+         * History buffer is a circular buffer that stores the sequence of
+         * retired instructions in FIFO order.
+         */
+        std::vector<CompactorEntry> historyBuffer;
+        unsigned int historyBufferTail;
+
+        struct IndexEntry : public TaggedEntry
+        {
+            unsigned int historyIndex;
+        };
+        /**
+         * The index table is a small cache-like structure that facilitates
+         * fast search of the history buffer.
+         */
+        AssociativeSet<IndexEntry> index;
+
+        /**
+         * A Stream Address Buffer (SAB) tracks a window of consecutive
+         * spatial regions. The SAB mantains a pointer to the sequence in the
+         * history buffer, initiallly set to the pointer taken from the index
+         * table
+         */
+        std::deque<CompactorEntry*> streamAddressBuffer;
+
+        /**
+         * Updates the prefetcher structures upon an instruction retired
+         * @param pc PC of the instruction being retired
+         */
+        void notifyRetiredInst(const Addr pc);
+
+        /**
+         * Probe Listener to handle probe events from the CPU
+         */
+        class PrefetchListenerPC : public ProbeListenerArgBase<Addr>
+        {
+          public:
+            PrefetchListenerPC(PIFPrefetcher &_parent, ProbeManager *pm,
+                             const std::string &name)
+                : ProbeListenerArgBase(pm, name),
+                  parent(_parent) {}
+            void notify(const Addr& pc) override;
+          protected:
+            PIFPrefetcher &parent;
+        };
+
+        /** Array of probe listeners */
+        std::vector<PrefetchListenerPC *> listenersPC;
+
+
+    public:
+        PIFPrefetcher(const PIFPrefetcherParams *p);
+        ~PIFPrefetcher() {}
+
+        void calculatePrefetch(const PrefetchInfo &pfi,
+                               std::vector<AddrPriority> &addresses);
+
+        /**
+         * Add a SimObject and a probe name to monitor the retired instructions
+         * @param obj The SimObject pointer to listen from
+         * @param name The probe name
+         */
+        void addEventProbeRetiredInsts(SimObject *obj, const char *name);
+};
+
+#endif // __MEM_CACHE_PREFETCH_PIF_HH__