mem-cache: Proactive Instruction Fetch Implementation
Ferdman, M., Kaynak, C., & Falsafi, B. (2011, December).
Proactive instruction fetch. In Proceedings of the 44th Annual IEEE/ACM
International Symposium on Microarchitecture (pp. 152-162). ACM.
Change-Id: I38c3ab30a94ab279f03e3d5936ce8ed118310c0e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16968
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py
index cb0a1c3..404a442 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -43,6 +43,7 @@
from m5.params import *
from m5.proxy import *
+from m5.objects.BaseCPU import BaseCPU
from m5.objects.ClockedObject import ClockedObject
from m5.objects.IndexingPolicies import *
from m5.objects.ReplacementPolicies import *
@@ -444,3 +445,42 @@
"Number of entries of the Region Miss Order Buffer")
reconstruction_entries = Param.Unsigned(256,
"Number of reconstruction entries")
+
+class HWPProbeEventRetiredInsts(HWPProbeEvent):
+ def register(self):
+ if self.obj:
+ for name in self.names:
+ self.prefetcher.getCCObject().addEventProbeRetiredInsts(
+ self.obj.getCCObject(), name)
+
+class PIFPrefetcher(QueuedPrefetcher):
+ type = 'PIFPrefetcher'
+ cxx_class = 'PIFPrefetcher'
+ cxx_header = "mem/cache/prefetch/pif.hh"
+ cxx_exports = [
+ PyBindMethod("addEventProbeRetiredInsts"),
+ ]
+
+ prec_spatial_region_bits = Param.Unsigned(2,
+ "Number of preceding addresses in the spatial region")
+ succ_spatial_region_bits = Param.Unsigned(8,
+ "Number of subsequent addresses in the spatial region")
+ compactor_entries = Param.Unsigned(2, "Entries in the temp. compactor")
+ stream_address_buffer_entries = Param.Unsigned(7, "Entries in the SAB")
+ history_buffer_size = Param.Unsigned(16, "Entries in the history buffer")
+
+ index_entries = Param.MemorySize("64",
+ "Number of entries in the index")
+ index_assoc = Param.Unsigned(64,
+ "Associativity of the index")
+ index_indexing_policy = Param.BaseIndexingPolicy(
+ SetAssociative(entry_size = 1, assoc = Parent.index_assoc,
+ size = Parent.index_entries),
+ "Indexing policy of the index")
+ index_replacement_policy = Param.BaseReplacementPolicy(LRURP(),
+ "Replacement policy of the index")
+
+ def listenFromProbeRetiredInstructions(self, simObj):
+ if not isinstance(simObj, BaseCPU):
+ raise TypeError("argument must be of BaseCPU type")
+ self.addEvent(HWPProbeEventRetiredInsts(self, simObj,"RetiredInstsPC"))
diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript
index 5b65338..0dae74e 100644
--- a/src/mem/cache/prefetch/SConscript
+++ b/src/mem/cache/prefetch/SConscript
@@ -38,6 +38,7 @@
Source('delta_correlating_prediction_tables.cc')
Source('irregular_stream_buffer.cc')
Source('indirect_memory.cc')
+Source('pif.cc')
Source('queued.cc')
Source('sbooe.cc')
Source('signature_path.cc')
diff --git a/src/mem/cache/prefetch/pif.cc b/src/mem/cache/prefetch/pif.cc
new file mode 100644
index 0000000..04064d4
--- /dev/null
+++ b/src/mem/cache/prefetch/pif.cc
@@ -0,0 +1,259 @@
+/**
+ * Copyright (c) 2019 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ivan Pizarro
+ */
+
+#include "mem/cache/prefetch/pif.hh"
+
+#include <cmath>
+#include <utility>
+
+#include "debug/HWPrefetch.hh"
+#include "mem/cache/prefetch/associative_set_impl.hh"
+#include "params/PIFPrefetcher.hh"
+
+PIFPrefetcher::PIFPrefetcher(const PIFPrefetcherParams *p)
+ : QueuedPrefetcher(p),
+ precSize(p->prec_spatial_region_bits),
+ succSize(p->succ_spatial_region_bits),
+ maxCompactorEntries(p->compactor_entries),
+ maxStreamAddressBufferEntries(p->stream_address_buffer_entries),
+ historyBuffer(p->history_buffer_size),
+ historyBufferTail(0),
+ index(p->index_assoc, p->index_entries, p->index_indexing_policy,
+ p->index_replacement_policy),
+ streamAddressBuffer(), listenersPC()
+{
+}
+
+PIFPrefetcher::CompactorEntry::CompactorEntry(Addr addr,
+ unsigned int prec_size, unsigned int succ_size)
+{
+ trigger = addr;
+ prec.resize(prec_size, false);
+ succ.resize(succ_size, false);
+}
+
+unsigned int
+PIFPrefetcher::CompactorEntry::distanceFromTrigger(Addr target,
+ unsigned int log_blk_size) const {
+ const Addr target_blk = target >> log_blk_size;
+ const Addr trigger_blk = trigger >> log_blk_size;
+
+ return std::abs(target_blk - trigger_blk);
+}
+
+bool
+PIFPrefetcher::CompactorEntry::inSameSpatialRegion(Addr pc,
+ unsigned int log_blk_size, bool update)
+{
+ unsigned int blk_distance = distanceFromTrigger(pc, log_blk_size);
+
+ bool hit = (pc > trigger) ?
+ (succ.size() >= blk_distance) : (prec.size() >= blk_distance);
+ if (hit && update) {
+ if (pc > trigger) {
+ succ[blk_distance - 1] = true;
+ } else if (pc < trigger) {
+ prec[blk_distance - 1] = true;
+ }
+ }
+ return hit;
+}
+
+bool
+PIFPrefetcher::CompactorEntry::hasAddress(Addr target,
+ unsigned int log_blk_size) const
+{
+ unsigned int blk_distance = distanceFromTrigger(target, log_blk_size);
+ bool hit = false;
+ if (target > trigger) {
+ hit = blk_distance <= succ.size() && succ[blk_distance - 1];
+ } else if (target < trigger) {
+ hit = blk_distance <= prec.size() && succ[blk_distance - 1];
+ } else {
+ hit = true;
+ }
+ return hit;
+}
+
+void
+PIFPrefetcher::CompactorEntry::getPredictedAddresses(unsigned int log_blk_size,
+ std::vector<AddrPriority> &addresses) const
+{
+ // Calculate the addresses of the instruction blocks that are encoded
+ // by the bit vector and issue prefetch requests for these addresses.
+ // Predictions are made by traversing the bit vector from left to right
+ // as this typically predicts the accesses in the order they will be
+ // issued in the core.
+ const Addr trigger_blk = trigger >> log_blk_size;
+ for (int i = prec.size()-1; i >= 0; i--) {
+ // Address from the preceding blocks to issue a prefetch
+ if (prec[i]) {
+ const Addr prec_addr = (trigger_blk - (i+1)) << log_blk_size;
+ addresses.push_back(AddrPriority(prec_addr, 0));
+ }
+ }
+ for (int i = 0; i < succ.size(); i++) {
+ // Address from the succeding blocks to issue a prefetch
+ if (succ[i]) {
+ const Addr succ_addr = (trigger_blk + (i+1)) << log_blk_size;
+ addresses.push_back(AddrPriority(succ_addr, 0));
+ }
+ }
+}
+
+void
+PIFPrefetcher::notifyRetiredInst(const Addr pc)
+{
+ // First access to the prefetcher
+ if (temporalCompactor.size() == 0) {
+ spatialCompactor = CompactorEntry(pc, precSize, succSize);
+ } else {
+ // If the PC of the instruction retired is in the same spatial region
+ // than the last trigger address, update the bit vectors based on the
+ // distance between them
+ if (spatialCompactor.inSameSpatialRegion(pc, lBlkSize, true)) {
+ // If the PC of the instruction retired is outside the latest spatial
+ // region, check if it matches in any of the regions in the temporal
+ // compactor and update it to the MRU position
+ } else {
+ bool is_in_temporal_compactor = false;
+
+ // Check if the PC is in the temporal compactor
+ for (auto it = temporalCompactor.begin();
+ it != temporalCompactor.end(); it++)
+ {
+ if (it->inSameSpatialRegion(pc, lBlkSize, false)) {
+ spatialCompactor = (*it);
+ temporalCompactor.erase(it);
+ is_in_temporal_compactor = true;
+ break;
+ }
+ }
+
+ if (temporalCompactor.size() == maxCompactorEntries) {
+ temporalCompactor.pop_front(); // Discard the LRU entry
+ }
+
+ temporalCompactor.push_back(spatialCompactor);
+
+ // If the compactor entry is neither the spatial or can't be
+ // found in the temporal compactor, reset the spatial compactor
+ // updating the trigger address and resetting the vector bits
+ if (!is_in_temporal_compactor) {
+ // Insert the spatial entry into the history buffer and update
+ // the 'index' table to point to the new entry
+ historyBuffer[historyBufferTail] = spatialCompactor;
+
+ IndexEntry *idx_entry =
+ index.findEntry(spatialCompactor.trigger, false);
+ if (idx_entry != nullptr) {
+ index.accessEntry(idx_entry);
+ } else {
+ idx_entry = index.findVictim(spatialCompactor.trigger);
+ assert(idx_entry != nullptr);
+ index.insertEntry(spatialCompactor.trigger, false,
+ idx_entry);
+ }
+ idx_entry->historyIndex = historyBufferTail;
+
+ historyBufferTail++;
+ if (historyBufferTail == historyBuffer.size()) {
+ historyBufferTail = 0;
+ }
+
+ // Reset the spatial compactor fields with the new address
+ spatialCompactor = CompactorEntry(pc, precSize, succSize);
+ }
+ }
+ }
+}
+
+void
+PIFPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
+ std::vector<AddrPriority> &addresses)
+{
+ const Addr addr = pfi.getAddr();
+
+ // First check if the access has been prefetched, this is done by
+ // comparing the access against the active Stream Address Buffers
+ for (auto &sabEntry : streamAddressBuffer) {
+ if (sabEntry->hasAddress(addr, lBlkSize)) {
+ // Advance to the next entry (first check if we have reached the
+ // end of the history buffer)
+ if (sabEntry == &(historyBuffer[historyBuffer.size() - 1])) {
+ sabEntry = &(historyBuffer[0]);
+ } else {
+ sabEntry++;
+ }
+ sabEntry->getPredictedAddresses(lBlkSize, addresses);
+ // We are done
+ return;
+ }
+ }
+
+ // Check if a valid entry in the 'index' table is found and allocate a new
+ // active prediction stream
+ IndexEntry *idx_entry = index.findEntry(addr, /* unused */ false);
+
+ if (idx_entry != nullptr) {
+ index.accessEntry(idx_entry);
+ // Trigger address from the 'index' table and index to the history
+ // buffer
+ const unsigned int hb_entry = idx_entry->historyIndex;
+ CompactorEntry *entry = &historyBuffer[hb_entry];
+
+ // Track the block in the Stream Address Buffer
+ if (streamAddressBuffer.size() == maxStreamAddressBufferEntries) {
+ streamAddressBuffer.pop_front();
+ }
+ streamAddressBuffer.push_back(entry);
+
+ entry->getPredictedAddresses(lBlkSize, addresses);
+ }
+}
+
+void
+PIFPrefetcher::PrefetchListenerPC::notify(const Addr& pc)
+{
+ parent.notifyRetiredInst(pc);
+}
+
+void
+PIFPrefetcher::addEventProbeRetiredInsts(SimObject *obj, const char *name)
+{
+ ProbeManager *pm(obj->getProbeManager());
+ listenersPC.push_back(new PrefetchListenerPC(*this, pm, name));
+}
+
+PIFPrefetcher*
+PIFPrefetcherParams::create()
+{
+ return new PIFPrefetcher(this);
+}
diff --git a/src/mem/cache/prefetch/pif.hh b/src/mem/cache/prefetch/pif.hh
new file mode 100644
index 0000000..6516b2c
--- /dev/null
+++ b/src/mem/cache/prefetch/pif.hh
@@ -0,0 +1,191 @@
+/**
+ * Copyright (c) 2019 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ivan Pizarro
+ */
+
+/** Implementation of the 'Proactive Instruction Fetch' prefetcher
+ * Reference:
+ * Ferdman, M., Kaynak, C., & Falsafi, B. (2011, December).
+ * Proactive instruction fetch.
+ * In Proceedings of the 44th Annual IEEE/ACM International Symposium
+ * on Microarchitecture (pp. 152-162). ACM.
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_PIF_HH__
+#define __MEM_CACHE_PREFETCH_PIF_HH__
+
+#include <deque>
+#include <vector>
+
+#include "mem/cache/prefetch/associative_set.hh"
+#include "mem/cache/prefetch/queued.hh"
+
+struct PIFPrefetcherParams;
+
+class PIFPrefetcher : public QueuedPrefetcher
+{
+ private:
+ /** Number of preceding and subsequent spatial addresses to compact */
+ const unsigned int precSize;
+ const unsigned int succSize;
+ /** Number of entries used for the temporal compactor */
+ const unsigned int maxCompactorEntries;
+ /** Max number of entries to be used in the Stream Address Buffer */
+ const unsigned int maxStreamAddressBufferEntries;
+
+ /**
+ * The compactor tracks retired instructions addresses, leveraging the
+ * spatial and temporal locality among instructions for compaction. It
+ *comprises the spatial and temporal compaction mechanisms.
+ *
+ * Taking advantage of the spatial locality across instruction blocks,
+ * the spatial compactor combines instruction-block addresses that fall
+ * within a 'spatial region', a group of adjacent instruction blocks.
+ * When an instruction outside the current spatial region retires, the
+ * existing spatial region is sent to the temporal compactor.
+ *
+ * The temporal compactor tracks a small number of the
+ * most-recently-observed spatial region records.
+ */
+ struct CompactorEntry {
+ Addr trigger;
+ std::vector<bool> prec;
+ std::vector<bool> succ;
+ CompactorEntry() {}
+ CompactorEntry(Addr, unsigned int, unsigned int);
+
+ /**
+ * Checks if a given address is in the same defined spatial region
+ * as the compactor entry.
+ * @param addr Address to check if it's inside the spatial region
+ * @param log_blk_distance log_2(block size of the cache)
+ * @param update if true, set the corresponding succ/prec entry
+ * @return TRUE if they are in the same spatial region, FALSE
+ * otherwise
+ */
+ bool inSameSpatialRegion(Addr addr, unsigned int log_blk_size,
+ bool update);
+ /**
+ * Checks if the provided address is contained in this spatial
+ * region and if its corresponding bit vector entry is set
+ * @param target address to check
+ * @param log_blk_distance log_2(block size of the cache)
+ * @return TRUE if target has its bit set
+ */
+ bool hasAddress(Addr target, unsigned int log_blk_size) const;
+
+ /**
+ * Fills the provided vector with the predicted addresses using the
+ * recorded bit vectors of the entry
+ * @param log_blk_distance log_2(block size of the cache)
+ * @param addresses reference to a vector to add the generated
+ * addresses
+ */
+ void getPredictedAddresses(unsigned int log_blk_size,
+ std::vector<AddrPriority> &addresses) const;
+ private:
+ /**
+ * Computes the distance, in cache blocks, from an address to the
+ * trigger of the entry.
+ * @param addr address to compute the distance from the trigger
+ * @param log_blk_distance log_2(block size of the cache)
+ * @result distance in cache blocks from the address to the trigger
+ */
+ unsigned int distanceFromTrigger(Addr addr,
+ unsigned int log_blk_size) const;
+ };
+
+ CompactorEntry spatialCompactor;
+ std::deque<CompactorEntry> temporalCompactor;
+
+ /**
+ * History buffer is a circular buffer that stores the sequence of
+ * retired instructions in FIFO order.
+ */
+ std::vector<CompactorEntry> historyBuffer;
+ unsigned int historyBufferTail;
+
+ struct IndexEntry : public TaggedEntry
+ {
+ unsigned int historyIndex;
+ };
+ /**
+ * The index table is a small cache-like structure that facilitates
+ * fast search of the history buffer.
+ */
+ AssociativeSet<IndexEntry> index;
+
+ /**
+ * A Stream Address Buffer (SAB) tracks a window of consecutive
+ * spatial regions. The SAB mantains a pointer to the sequence in the
+ * history buffer, initiallly set to the pointer taken from the index
+ * table
+ */
+ std::deque<CompactorEntry*> streamAddressBuffer;
+
+ /**
+ * Updates the prefetcher structures upon an instruction retired
+ * @param pc PC of the instruction being retired
+ */
+ void notifyRetiredInst(const Addr pc);
+
+ /**
+ * Probe Listener to handle probe events from the CPU
+ */
+ class PrefetchListenerPC : public ProbeListenerArgBase<Addr>
+ {
+ public:
+ PrefetchListenerPC(PIFPrefetcher &_parent, ProbeManager *pm,
+ const std::string &name)
+ : ProbeListenerArgBase(pm, name),
+ parent(_parent) {}
+ void notify(const Addr& pc) override;
+ protected:
+ PIFPrefetcher &parent;
+ };
+
+ /** Array of probe listeners */
+ std::vector<PrefetchListenerPC *> listenersPC;
+
+
+ public:
+ PIFPrefetcher(const PIFPrefetcherParams *p);
+ ~PIFPrefetcher() {}
+
+ void calculatePrefetch(const PrefetchInfo &pfi,
+ std::vector<AddrPriority> &addresses);
+
+ /**
+ * Add a SimObject and a probe name to monitor the retired instructions
+ * @param obj The SimObject pointer to listen from
+ * @param name The probe name
+ */
+ void addEventProbeRetiredInsts(SimObject *obj, const char *name);
+};
+
+#endif // __MEM_CACHE_PREFETCH_PIF_HH__