mem-cache: Move access latency calculation to Cache

Access latency was not being calculated properly, as it was
always assuming that for hits reads take as long as writes,
and that parallel accesses would produce the same latency
for read and write misses.

By moving the calculation to the Cache we can use the write/
read information, reduce latency variables duplication and
remove Cache dependency from Tags.

The tag lookup latency is still calculated by the Tags.

Change-Id: I71bc68fb5c3515b372c3bf002d61b6f048a45540
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/13697
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index 4ca8152..980aa91 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -95,6 +95,7 @@
       forwardLatency(p->tag_latency),
       fillLatency(p->data_latency),
       responseLatency(p->response_latency),
+      sequentialAccess(p->sequential_access),
       numTarget(p->tgts_per_mshr),
       forwardSnoops(true),
       clusivity(p->clusivity),
@@ -225,8 +226,8 @@
         // In this case we are considering request_time that takes
         // into account the delay of the xbar, if any, and just
         // lat, neglecting responseLatency, modelling hit latency
-        // just as lookupLatency or or the value of lat overriden
-        // by access(), that calls accessBlock() function.
+        // just as the value of lat overriden by access(), which calls
+        // the calculateAccessLatency() function.
         cpuSidePort.schedTimingResp(pkt, request_time, true);
     } else {
         DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__,
@@ -342,15 +343,13 @@
     // the delay provided by the crossbar
     Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
 
-    // We use lookupLatency here because it is used to specify the latency
-    // to access.
-    Cycles lat = lookupLatency;
+    Cycles lat;
     CacheBlk *blk = nullptr;
     bool satisfied = false;
     {
         PacketList writebacks;
         // Note that lat is passed by reference here. The function
-        // access() calls accessBlock() which can modify lat value.
+        // access() will set the lat value.
         satisfied = access(pkt, blk, lat, writebacks);
 
         // copy writebacks to write buffer here to ensure they logically
@@ -360,8 +359,7 @@
 
     // Here we charge the headerDelay that takes into account the latencies
     // of the bus, if the packet comes from it.
-    // The latency charged it is just lat that is the value of lookupLatency
-    // modified by access() function, or if not just lookupLatency.
+    // The latency charged is just the value set by the access() function.
     // In case of a hit we are neglecting response latency.
     // In case of a miss we are neglecting forward latency.
     Tick request_time = clockEdge(lat) + pkt->headerDelay;
@@ -886,6 +884,31 @@
 // Access path: requests coming in from the CPU side
 //
 /////////////////////////////////////////////////////
+Cycles
+BaseCache::calculateAccessLatency(const CacheBlk* blk,
+                                  const Cycles lookup_lat) const
+{
+    Cycles lat(lookup_lat);
+
+    if (blk != nullptr) {
+        // First access tags, then data
+        if (sequentialAccess) {
+            lat += dataLatency;
+        // Latency is dictated by the slowest of tag and data latencies
+        } else {
+            lat = std::max(lookup_lat, dataLatency);
+        }
+
+        // Check if the block to be accessed is available. If not, apply the
+        // access latency on top of block->whenReady.
+        if (blk->whenReady > curTick() &&
+            ticksToCycles(blk->whenReady - curTick()) > lat) {
+            lat += ticksToCycles(blk->whenReady - curTick());
+        }
+    }
+
+    return lat;
+}
 
 bool
 BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
@@ -898,9 +921,12 @@
                   "Should never see a write in a read-only cache %s\n",
                   name());
 
-    // Here lat is the value passed as parameter to accessBlock() function
-    // that can modify its value.
-    blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), lat);
+    // Access block in the tags
+    Cycles tag_latency(0);
+    blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), tag_latency);
+
+    // Calculate access latency
+    lat = calculateAccessLatency(blk, tag_latency);
 
     DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(),
             blk ? "hit " + blk->print() : "miss");
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index ad5ff3b..240bf21 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -419,6 +419,17 @@
     Addr regenerateBlkAddr(CacheBlk* blk);
 
     /**
+     * Calculate access latency in ticks given a tag lookup latency, and
+     * whether access was a hit or miss.
+     *
+     * @param blk The cache block that was accessed.
+     * @param lookup_lat Latency of the respective tag lookup.
+     * @return The number of ticks that pass due to a block access.
+     */
+    Cycles calculateAccessLatency(const CacheBlk* blk,
+                                  const Cycles lookup_lat) const;
+
+    /**
      * Does all the processing necessary to perform the provided request.
      * @param pkt The memory request to perform.
      * @param blk The cache block to be updated.
@@ -805,6 +816,11 @@
      */
     const Cycles responseLatency;
 
+    /**
+     * Whether tags and data are accessed sequentially.
+     */
+    const bool sequentialAccess;
+
     /** The number of targets for each MSHR. */
     const int numTarget;
 
diff --git a/src/mem/cache/tags/Tags.py b/src/mem/cache/tags/Tags.py
index 8e30289..b34779e 100644
--- a/src/mem/cache/tags/Tags.py
+++ b/src/mem/cache/tags/Tags.py
@@ -54,10 +54,6 @@
     tag_latency = Param.Cycles(Parent.tag_latency,
                                "The tag lookup latency for this cache")
 
-    # Get the RAM access latency from the parent (cache)
-    data_latency = Param.Cycles(Parent.data_latency,
-                               "The data access latency for this cache")
-
     # Get the warmup percentage from the parent (cache)
     warmup_percentage = Param.Percent(Parent.warmup_percentage,
         "Percentage of tags to be touched to warm up the cache")
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
index c6a9a82..5fbbdc1 100644
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -61,11 +61,7 @@
 
 BaseTags::BaseTags(const Params *p)
     : ClockedObject(p), blkSize(p->block_size), blkMask(blkSize - 1),
-      size(p->size),
-      lookupLatency(p->tag_latency),
-      accessLatency(p->sequential_access ?
-                    p->tag_latency + p->data_latency :
-                    std::max(p->tag_latency, p->data_latency)),
+      size(p->size), lookupLatency(p->tag_latency),
       cache(nullptr), indexingPolicy(p->indexing_policy),
       warmupBound((p->warmup_percentage/100.0) * (p->size / p->block_size)),
       warmedUp(false), numBlocks(p->size / p->block_size),
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index a7a35ff..273abf5 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -79,12 +79,7 @@
     const unsigned size;
     /** The tag lookup latency of the cache. */
     const Cycles lookupLatency;
-    /**
-     * The total access latency of the cache. This latency
-     * is different depending on the cache access mode
-     * (parallel or sequential)
-     */
-    const Cycles accessLatency;
+
     /** Pointer to the parent cache. */
     BaseCache *cache;
 
@@ -293,6 +288,17 @@
     virtual CacheBlk* findVictim(Addr addr, const bool is_secure,
                                  std::vector<CacheBlk*>& evict_blks) const = 0;
 
+    /**
+     * Access block and update replacement data. May not succeed, in which case
+     * nullptr is returned. This has all the implications of a cache access and
+     * should only be used as such. Returns the tag lookup latency as a side
+     * effect.
+     *
+     * @param addr The address to find.
+     * @param is_secure True if the target memory space is secure.
+     * @param lat The latency of the tag lookup.
+     * @return Pointer to the cache block if found.
+     */
     virtual CacheBlk* accessBlock(Addr addr, bool is_secure, Cycles &lat) = 0;
 
     /**
diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh
index 58aceb0..bc98afa 100644
--- a/src/mem/cache/tags/base_set_assoc.hh
+++ b/src/mem/cache/tags/base_set_assoc.hh
@@ -115,12 +115,13 @@
 
     /**
      * Access block and update replacement data. May not succeed, in which case
-     * nullptr is returned. This has all the implications of a cache
-     * access and should only be used as such. Returns the access latency as a
-     * side effect.
+     * nullptr is returned. This has all the implications of a cache access and
+     * should only be used as such. Returns the tag lookup latency as a side
+     * effect.
+     *
      * @param addr The address to find.
      * @param is_secure True if the target memory space is secure.
-     * @param lat The access latency.
+     * @param lat The latency of the tag lookup.
      * @return Pointer to the cache block if found.
      */
     CacheBlk* accessBlock(Addr addr, bool is_secure, Cycles &lat) override
@@ -139,28 +140,18 @@
             dataAccesses += allocAssoc;
         }
 
+        // If a cache hit
         if (blk != nullptr) {
-            // If a cache hit
-            lat = accessLatency;
-            // Check if the block to be accessed is available. If not,
-            // apply the accessLatency on top of block->whenReady.
-            if (blk->whenReady > curTick() &&
-                cache->ticksToCycles(blk->whenReady - curTick()) >
-                accessLatency) {
-                lat = cache->ticksToCycles(blk->whenReady - curTick()) +
-                accessLatency;
-            }
-
             // Update number of references to accessed block
             blk->refCount++;
 
             // Update replacement data of accessed block
             replacementPolicy->touch(blk->replacementData);
-        } else {
-            // If a cache miss
-            lat = lookupLatency;
         }
 
+        // The tag lookup latency is the same for a hit or a miss
+        lat = lookupLatency;
+
         return blk;
     }
 
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 2c92a94..9648468 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -153,30 +153,22 @@
     CachesMask mask = 0;
     FALRUBlk* blk = static_cast<FALRUBlk*>(findBlock(addr, is_secure));
 
+    // If a cache hit
     if (blk && blk->isValid()) {
-        // If a cache hit
-        lat = accessLatency;
-        // Check if the block to be accessed is available. If not,
-        // apply the accessLatency on top of block->whenReady.
-        if (blk->whenReady > curTick() &&
-            cache->ticksToCycles(blk->whenReady - curTick()) >
-            accessLatency) {
-            lat = cache->ticksToCycles(blk->whenReady - curTick()) +
-            accessLatency;
-        }
         mask = blk->inCachesMask;
 
         moveToHead(blk);
-    } else {
-        // If a cache miss
-        lat = lookupLatency;
     }
+
     if (in_caches_mask) {
         *in_caches_mask = mask;
     }
 
     cacheTracking.recordAccess(blk);
 
+    // The tag lookup latency is the same for a hit or a miss
+    lat = lookupLatency;
+
     return blk;
 }
 
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index 6ea4c8d..1de6de4 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -180,10 +180,11 @@
      * Access block and update replacement data.  May not succeed, in which
      * case nullptr pointer is returned.  This has all the implications of a
      * cache access and should only be used as such.
-     * Returns the access latency and inCachesMask flags as a side effect.
+     * Returns tag lookup latency and the inCachesMask flags as a side effect.
+     *
      * @param addr The address to look for.
      * @param is_secure True if the target memory space is secure.
-     * @param lat The latency of the access.
+     * @param lat The latency of the tag lookup.
      * @param in_cache_mask Mask indicating the caches in which the blk fits.
      * @return Pointer to the cache block.
      */
diff --git a/src/mem/cache/tags/sector_tags.cc b/src/mem/cache/tags/sector_tags.cc
index 24751c9..02649cc 100644
--- a/src/mem/cache/tags/sector_tags.cc
+++ b/src/mem/cache/tags/sector_tags.cc
@@ -149,18 +149,8 @@
         dataAccesses += allocAssoc*numBlocksPerSector;
     }
 
+    // If a cache hit
     if (blk != nullptr) {
-        // If a cache hit
-        lat = accessLatency;
-        // Check if the block to be accessed is available. If not,
-        // apply the accessLatency on top of block->whenReady.
-        if (blk->whenReady > curTick() &&
-            cache->ticksToCycles(blk->whenReady - curTick()) >
-            accessLatency) {
-            lat = cache->ticksToCycles(blk->whenReady - curTick()) +
-            accessLatency;
-        }
-
         // Update number of references to accessed block
         blk->refCount++;
 
@@ -171,11 +161,11 @@
         // Update replacement data of accessed block, which is shared with
         // the whole sector it belongs to
         replacementPolicy->touch(sector_blk->replacementData);
-    } else {
-        // If a cache miss
-        lat = lookupLatency;
     }
 
+    // The tag lookup latency is the same for a hit or a miss
+    lat = lookupLatency;
+
     return blk;
 }
 
diff --git a/src/mem/cache/tags/sector_tags.hh b/src/mem/cache/tags/sector_tags.hh
index 1149ba1..f9d47f3 100644
--- a/src/mem/cache/tags/sector_tags.hh
+++ b/src/mem/cache/tags/sector_tags.hh
@@ -118,12 +118,12 @@
     /**
      * Access block and update replacement data. May not succeed, in which
      * case nullptr is returned. This has all the implications of a cache
-     * access and should only be used as such. Returns the access latency
+     * access and should only be used as such. Returns the tag lookup latency
      * as a side effect.
      *
      * @param addr The address to find.
      * @param is_secure True if the target memory space is secure.
-     * @param lat The access latency.
+     * @param lat The latency of the tag lookup.
      * @return Pointer to the cache block if found.
      */
     CacheBlk* accessBlock(Addr addr, bool is_secure, Cycles &lat) override;