mem-cache: Access Map Pattern Matching Prefetcher

Implementation of the Access Map Pattern Matching prefetcher
Based in the description of the following paper:
  Access map pattern matching for high performance data cache prefetch.
  Ishii, Y., Inaba, M., & Hiraki, K. (2011).
  Journal of Instruction-Level Parallelism, 13, 1-24.

Change-Id: I0d4b7f7afc2ab4938bdd8755bfed26e26a28530c
Reviewed-on: https://gem5-review.googlesource.com/c/15096
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py
index a868a25..3d9c665 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -179,3 +179,37 @@
         "Minimum confidence to issue prefetches")
     lookahead_confidence_threshold = Param.Float(0.75,
         "Minimum confidence to continue exploring lookahead entries")
+
+class AccessMapPatternMatchingPrefetcher(QueuedPrefetcher):
+    type = 'AccessMapPatternMatchingPrefetcher'
+    cxx_class = 'AccessMapPatternMatchingPrefetcher'
+    cxx_header = "mem/cache/prefetch/access_map_pattern_matching.hh"
+
+    start_degree = Param.Unsigned(4,
+        "Initial degree (Maximum number of prefetches generated")
+    hot_zone_size = Param.MemorySize("2kB", "Memory covered by a hot zone")
+    access_map_table_entries = Param.MemorySize("256",
+        "Number of entries in the access map table")
+    access_map_table_assoc = Param.Unsigned(8,
+        "Associativity of the access map table")
+    access_map_table_indexing_policy = Param.BaseIndexingPolicy(
+        SetAssociative(entry_size = 1, assoc = Parent.access_map_table_assoc,
+        size = Parent.access_map_table_entries),
+        "Indexing policy of the access map table")
+    access_map_table_replacement_policy = Param.BaseReplacementPolicy(LRURP(),
+        "Replacement policy of the access map table")
+    high_coverage_threshold = Param.Float(0.25,
+        "A prefetch coverage factor bigger than this is considered high")
+    low_coverage_threshold = Param.Float(0.125,
+        "A prefetch coverage factor smaller than this is considered low")
+    high_accuracy_threshold = Param.Float(0.5,
+        "A prefetch accuracy factor bigger than this is considered high")
+    low_accuracy_threshold = Param.Float(0.25,
+        "A prefetch accuracy factor smaller than this is considered low")
+    high_cache_hit_threshold = Param.Float(0.875,
+        "A cache hit ratio bigger than this is considered high")
+    low_cache_hit_threshold = Param.Float(0.75,
+        "A cache hit ratio smaller than this is considered low")
+    epoch_cycles = Param.Cycles(256000, "Cycles in an epoch period")
+    offchip_memory_latency = Param.Latency("30ns",
+        "Memory latency used to compute the required memory bandwidth")
diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript
index ccbc2e3..b461586 100644
--- a/src/mem/cache/prefetch/SConscript
+++ b/src/mem/cache/prefetch/SConscript
@@ -32,6 +32,7 @@
 
 SimObject('Prefetcher.py')
 
+Source('access_map_pattern_matching.cc')
 Source('base.cc')
 Source('queued.cc')
 Source('signature_path.cc')
diff --git a/src/mem/cache/prefetch/access_map_pattern_matching.cc b/src/mem/cache/prefetch/access_map_pattern_matching.cc
new file mode 100644
index 0000000..0f46eff
--- /dev/null
+++ b/src/mem/cache/prefetch/access_map_pattern_matching.cc
@@ -0,0 +1,252 @@
+/**
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Javier Bueno
+ */
+
+#include "mem/cache/prefetch/access_map_pattern_matching.hh"
+
+#include "debug/HWPrefetch.hh"
+#include "mem/cache/prefetch/associative_set_impl.hh"
+#include "params/AccessMapPatternMatchingPrefetcher.hh"
+
+AccessMapPatternMatchingPrefetcher::AccessMapPatternMatchingPrefetcher(
+    const AccessMapPatternMatchingPrefetcherParams *p)
+    : QueuedPrefetcher(p),
+      startDegree(p->start_degree), hotZoneSize(p->hot_zone_size),
+      highCoverageThreshold(p->high_coverage_threshold),
+      lowCoverageThreshold(p->low_coverage_threshold),
+      highAccuracyThreshold(p->high_accuracy_threshold),
+      lowAccuracyThreshold(p->low_accuracy_threshold),
+      highCacheHitThreshold(p->high_cache_hit_threshold),
+      lowCacheHitThreshold(p->low_cache_hit_threshold),
+      epochCycles(p->epoch_cycles),
+      offChipMemoryLatency(p->offchip_memory_latency),
+      accessMapTable(p->access_map_table_assoc, p->access_map_table_entries,
+                     p->access_map_table_indexing_policy,
+                     p->access_map_table_replacement_policy,
+                     AccessMapEntry(hotZoneSize / blkSize)),
+      numGoodPrefetches(0), numTotalPrefetches(0), numRawCacheMisses(0),
+      numRawCacheHits(0), degree(startDegree), usefulDegree(startDegree),
+      epochEvent([this]{ processEpochEvent(); }, name())
+{
+    fatal_if(!isPowerOf2(hotZoneSize),
+        "the hot zone size must be a power of 2");
+    if (!epochEvent.scheduled()) {
+        schedule(epochEvent, clockEdge(epochCycles));
+    }
+}
+
+void
+AccessMapPatternMatchingPrefetcher::processEpochEvent()
+{
+    schedule(epochEvent, clockEdge(epochCycles));
+    double prefetch_accuracy =
+        ((double) numGoodPrefetches) / ((double) numTotalPrefetches);
+    double prefetch_coverage =
+        ((double) numGoodPrefetches) / ((double) numRawCacheMisses);
+    double cache_hit_ratio = ((double) numRawCacheHits) /
+        ((double) (numRawCacheHits + numRawCacheMisses));
+    double num_requests = (double) (numRawCacheMisses - numGoodPrefetches +
+        numTotalPrefetches);
+    double memory_bandwidth = num_requests * offChipMemoryLatency /
+        clockEdge(epochCycles);
+
+    if (prefetch_coverage > highCoverageThreshold &&
+        (prefetch_accuracy > highAccuracyThreshold ||
+        cache_hit_ratio < lowCacheHitThreshold)) {
+        usefulDegree += 1;
+    } else if ((prefetch_coverage < lowCoverageThreshold &&
+               (prefetch_accuracy < lowAccuracyThreshold ||
+                cache_hit_ratio > highCacheHitThreshold)) ||
+               (prefetch_accuracy < lowAccuracyThreshold &&
+                cache_hit_ratio > highCacheHitThreshold)) {
+        usefulDegree -= 1;
+    }
+    degree = std::min((unsigned) memory_bandwidth, usefulDegree);
+    // reset epoch stats
+    numGoodPrefetches = 0.0;
+    numTotalPrefetches = 0.0;
+    numRawCacheMisses = 0.0;
+    numRawCacheHits = 0.0;
+}
+
+AccessMapPatternMatchingPrefetcher::AccessMapEntry *
+AccessMapPatternMatchingPrefetcher::getAccessMapEntry(Addr am_addr,
+                bool is_secure)
+{
+    AccessMapEntry *am_entry = accessMapTable.findEntry(am_addr, is_secure);
+    if (am_entry != nullptr) {
+        accessMapTable.accessEntry(am_entry);
+    } else {
+        am_entry = accessMapTable.findVictim(am_addr);
+        assert(am_entry != nullptr);
+
+        accessMapTable.insertEntry(am_addr, is_secure, am_entry);
+    }
+    return am_entry;
+}
+
+void
+AccessMapPatternMatchingPrefetcher::setEntryState(AccessMapEntry &entry,
+    Addr block, enum AccessMapState state)
+{
+    enum AccessMapState old = entry.states[block];
+    entry.states[block] = state;
+
+    //do not update stats when initializing
+    if (state == AM_INIT) return;
+
+    switch (old) {
+        case AM_INIT:
+            if (state == AM_PREFETCH) {
+                numTotalPrefetches += 1;
+            } else if (state == AM_ACCESS) {
+                numRawCacheMisses += 1;
+            }
+            break;
+        case AM_PREFETCH:
+            if (state == AM_ACCESS) {
+                numGoodPrefetches += 1;
+                numRawCacheMisses += 1;
+            }
+            break;
+        case AM_ACCESS:
+            if (state == AM_ACCESS) {
+                numRawCacheHits += 1;
+            }
+            break;
+        default:
+            panic("Impossible path\n");
+            break;
+    }
+}
+
+void
+AccessMapPatternMatchingPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
+    std::vector<AddrPriority> &addresses)
+{
+    assert(addresses.empty());
+    bool is_secure = pfi.isSecure();
+    Addr am_addr = pfi.getAddr() / hotZoneSize;
+    Addr current_block = (pfi.getAddr() % hotZoneSize) / blkSize;
+    uint64_t lines_per_zone = hotZoneSize / blkSize;
+
+    // Get the entries of the curent block (am_addr), the previous, and the
+    // following ones
+    AccessMapEntry *am_entry_curr = getAccessMapEntry(am_addr, is_secure);
+    AccessMapEntry *am_entry_prev = (am_addr > 0) ?
+        getAccessMapEntry(am_addr-1, is_secure) : nullptr;
+    AccessMapEntry *am_entry_next = (am_addr < (MaxAddr/hotZoneSize)) ?
+        getAccessMapEntry(am_addr+1, is_secure) : nullptr;
+    assert(am_entry_curr != am_entry_prev);
+    assert(am_entry_curr != am_entry_next);
+    assert(am_entry_prev != am_entry_next);
+    assert(am_entry_curr != nullptr);
+
+    //Mark the current access as Accessed
+    setEntryState(*am_entry_curr, current_block, AM_ACCESS);
+
+    /**
+     * Create a contiguous copy of the 3 entries states.
+     * With this, we avoid doing boundaries checking in the loop that looks
+     * for prefetch candidates, mark out of range positions with AM_INVALID
+     */
+    std::vector<AccessMapState> states(3 * lines_per_zone);
+    for (unsigned idx = 0; idx < lines_per_zone; idx += 1) {
+        states[idx] =
+            am_entry_prev != nullptr ? am_entry_prev->states[idx] : AM_INVALID;
+        states[idx + lines_per_zone] = am_entry_curr->states[idx];
+        states[idx + 2 * lines_per_zone] =
+            am_entry_next != nullptr ? am_entry_next->states[idx] : AM_INVALID;
+    }
+
+    /**
+     * am_entry_prev->states => states[               0 ..   lines_per_zone-1]
+     * am_entry_curr->states => states[  lines_per_zone .. 2*lines_per_zone-1]
+     * am_entry_next->states => states[2*lines_per_zone .. 3*lines_per_zone-1]
+     */
+
+    // index of the current_block in the new vector
+    Addr states_current_block = current_block + lines_per_zone;
+    // consider strides 1..lines_per_zone/2
+    for (int stride = 1; stride < lines_per_zone/2; stride += 1) {
+        // Test accessed positive strides
+        if (checkCandidate(states, states_current_block, stride)) {
+            // candidate found, current_block - stride
+            Addr pf_addr;
+            if (stride > current_block) {
+                // The index (current_block - stride) falls in the range of
+                // the previous zone (am_entry_prev), adjust the address
+                // accordingly
+                Addr blk = states_current_block - stride;
+                pf_addr = (am_addr - 1) * hotZoneSize + blk * blkSize;
+                setEntryState(*am_entry_prev, blk, AM_PREFETCH);
+            } else {
+                // The index (current_block - stride) falls within
+                // am_entry_curr
+                Addr blk = current_block - stride;
+                pf_addr = am_addr * hotZoneSize + blk * blkSize;
+                setEntryState(*am_entry_curr, blk, AM_PREFETCH);
+            }
+            addresses.push_back(AddrPriority(pf_addr, 0));
+            if (addresses.size() == degree) {
+                break;
+            }
+        }
+
+        // Test accessed negative strides
+        if (checkCandidate(states, states_current_block, -stride)) {
+            // candidate found, current_block + stride
+            Addr pf_addr;
+            if (current_block + stride >= lines_per_zone) {
+                // The index (current_block + stride) falls in the range of
+                // the next zone (am_entry_next), adjust the address
+                // accordingly
+                Addr blk = (states_current_block + stride) % lines_per_zone;
+                pf_addr = (am_addr + 1) * hotZoneSize + blk * blkSize;
+                setEntryState(*am_entry_next, blk, AM_PREFETCH);
+            } else {
+                // The index (current_block + stride) falls within
+                // am_entry_curr
+                Addr blk = current_block + stride;
+                pf_addr = am_addr * hotZoneSize + blk * blkSize;
+                setEntryState(*am_entry_curr, blk, AM_PREFETCH);
+            }
+            addresses.push_back(AddrPriority(pf_addr, 0));
+            if (addresses.size() == degree) {
+                break;
+            }
+        }
+    }
+}
+
+AccessMapPatternMatchingPrefetcher*
+AccessMapPatternMatchingPrefetcherParams::create()
+{
+    return new AccessMapPatternMatchingPrefetcher(this);
+}
diff --git a/src/mem/cache/prefetch/access_map_pattern_matching.hh b/src/mem/cache/prefetch/access_map_pattern_matching.hh
new file mode 100644
index 0000000..33a20a2
--- /dev/null
+++ b/src/mem/cache/prefetch/access_map_pattern_matching.hh
@@ -0,0 +1,182 @@
+/**
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Javier Bueno
+ */
+
+ /**
+  * Implementation of the Access Map Pattern Matching Prefetcher
+  *
+  * References:
+  *     Access map pattern matching for high performance data cache prefetch.
+  *     Ishii, Y., Inaba, M., & Hiraki, K. (2011).
+  *     Journal of Instruction-Level Parallelism, 13, 1-24.
+  */
+
+#ifndef __MEM_CACHE_PREFETCH_ACCESS_MAP_PATTERN_MATCHING_HH__
+#define __MEM_CACHE_PREFETCH_ACCESS_MAP_PATTERN_MATCHING_HH__
+
+#include "mem/cache/base.hh"
+#include "mem/cache/prefetch/associative_set.hh"
+#include "mem/cache/prefetch/queued.hh"
+#include "mem/packet.hh"
+
+struct AccessMapPatternMatchingPrefetcherParams;
+
+class AccessMapPatternMatchingPrefetcher : public QueuedPrefetcher
+{
+    /** Maximum number of prefetch generated */
+    const unsigned startDegree;
+    /** Amount of memory covered by a hot zone */
+    const uint64_t hotZoneSize;
+    /** A prefetch coverage factor bigger than this is considered high */
+    const double highCoverageThreshold;
+    /** A prefetch coverage factor smaller than this is considered low */
+    const double lowCoverageThreshold;
+    /** A prefetch accuracy factor bigger than this is considered high */
+    const double highAccuracyThreshold;
+    /** A prefetch accuracy factor smaller than this is considered low */
+    const double lowAccuracyThreshold;
+    /** A cache hit ratio bigger than this is considered high */
+    const double highCacheHitThreshold;
+    /** A cache hit ratio smaller than this is considered low */
+    const double lowCacheHitThreshold;
+    /** Cycles in an epoch period */
+    const Cycles epochCycles;
+    /** Off chip memory latency to use for the epoch bandwidth calculation */
+    const Tick offChipMemoryLatency;
+
+    /** Data type representing the state of a cacheline in the access map */
+    enum AccessMapState
+    {
+        AM_INIT,
+        AM_PREFETCH,
+        AM_ACCESS,
+        AM_INVALID
+    };
+
+    /** AccessMapEntry data type */
+    struct AccessMapEntry : public TaggedEntry
+    {
+        /** vector containing the state of the cachelines in this zone */
+        std::vector<AccessMapState> states;
+
+        AccessMapEntry(size_t num_entries) : states(num_entries, AM_INIT)
+        {}
+
+        /** Reset the entries to their initial values */
+        void reset() override
+        {
+            for (auto &entry : states) {
+                entry = AM_INIT;
+            }
+        }
+    };
+    /** Access map table */
+    AssociativeSet<AccessMapEntry> accessMapTable;
+
+    /**
+     * Number of good prefetches
+     * - State transitions from PREFETCH to ACCESS
+     */
+    uint64_t numGoodPrefetches;
+    /**
+     * Number of prefetches issued
+     * - State transitions from INIT to PREFETCH
+     */
+    uint64_t numTotalPrefetches;
+    /**
+     * Number of raw cache misses
+     * - State transitions from INIT or PREFETCH to ACCESS
+     */
+    uint64_t numRawCacheMisses;
+    /**
+     * Number of raw cache hits
+     * - State transitions from ACCESS to ACCESS
+     */
+    uint64_t numRawCacheHits;
+    /** Current degree */
+    unsigned degree;
+    /** Current useful degree */
+    unsigned usefulDegree;
+
+    /**
+     * Given a target cacheline, this function checks if the cachelines
+     * that follow the provided stride have been accessed. If so, the line
+     * is considered a good candidate.
+     * @param states vector containing the states of three contiguous hot zones
+     * @param current target block (cacheline)
+     * @param stride access stride to obtain the reference cachelines
+     * @return true if current is a prefetch candidate
+     */
+    inline bool checkCandidate(std::vector<AccessMapState> const &states,
+                        Addr current, int stride) const
+    {
+        enum AccessMapState tgt   = states[current - stride];
+        enum AccessMapState s     = states[current + stride];
+        enum AccessMapState s2    = states[current + 2 * stride];
+        enum AccessMapState s2_p1 = states[current + 2 * stride + 1];
+        return (tgt != AM_INVALID &&
+                ((s == AM_ACCESS && s2 == AM_ACCESS) ||
+                (s == AM_ACCESS && s2_p1 == AM_ACCESS)));
+    }
+
+    /**
+     * Obtain an AccessMapEntry  from the AccessMapTable, if the entry is not
+     * found a new one is initialized and inserted.
+     * @param am_addr address of the hot zone
+     * @param is_secure whether the address belongs to the secure memory area
+     * @return the corresponding entry
+     */
+    AccessMapEntry *getAccessMapEntry(Addr am_addr, bool is_secure);
+
+    /**
+     * Updates the state of a block within an AccessMapEntry, also updates
+     * the prefetcher metrics.
+     * @param entry AccessMapEntry to update
+     * @param block cacheline within the hot zone
+     * @param state new state
+     */
+    void setEntryState(AccessMapEntry &entry, Addr block,
+        enum AccessMapState state);
+
+    /**
+     * This event constitues the epoch of the statistics that keep track of
+     * the prefetcher accuracy, when this event triggers, the prefetcher degree
+     * is adjusted and the statistics counters are reset.
+     */
+    void processEpochEvent();
+    EventFunctionWrapper epochEvent;
+
+  public:
+    AccessMapPatternMatchingPrefetcher(
+        const AccessMapPatternMatchingPrefetcherParams* p);
+    ~AccessMapPatternMatchingPrefetcher() {}
+    void calculatePrefetch(const PrefetchInfo &pfi,
+                           std::vector<AddrPriority> &addresses) override;
+};
+#endif//__MEM_CACHE_PREFETCH_ACCESS_MAP_PATTERN_MATCHING_HH__