mem-cache: Add compression and decompression calls

Add a compressor to the base cache class and compress within
block allocation and decompress on writebacks.

This change does not implement data expansion (fat writes) yet,
nor it adds the compression latency to the block write time.

Change-Id: Ie36db65f7487c9b05ec4aedebc2c7651b4cb4821
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/11410
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/Cache.py b/src/mem/cache/Cache.py
index b2f4784..7a28136 100644
--- a/src/mem/cache/Cache.py
+++ b/src/mem/cache/Cache.py
@@ -44,11 +44,11 @@
 from m5.SimObject import SimObject
 
 from m5.objects.ClockedObject import ClockedObject
+from m5.objects.Compressors import BaseCacheCompressor
 from m5.objects.Prefetcher import BasePrefetcher
 from m5.objects.ReplacementPolicies import *
 from m5.objects.Tags import *
 
-
 # Enum for cache clusivity, currently mostly inclusive or mostly
 # exclusive.
 class Clusivity(Enum): vals = ['mostly_incl', 'mostly_excl']
@@ -105,6 +105,8 @@
     replacement_policy = Param.BaseReplacementPolicy(LRURP(),
         "Replacement policy")
 
+    compressor = Param.BaseCacheCompressor(NULL, "Cache compressor.")
+
     sequential_access = Param.Bool(False,
         "Whether to access tags and data sequentially")
 
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index 36968a1..e2149db 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -54,6 +54,7 @@
 #include "debug/CachePort.hh"
 #include "debug/CacheRepl.hh"
 #include "debug/CacheVerbose.hh"
+#include "mem/cache/compressors/base.hh"
 #include "mem/cache/mshr.hh"
 #include "mem/cache/prefetch/base.hh"
 #include "mem/cache/queue_entry.hh"
@@ -83,6 +84,7 @@
       mshrQueue("MSHRs", p->mshrs, 0, p->demand_mshr_reserve), // see below
       writeBuffer("write buffer", p->write_buffers, p->mshrs), // see below
       tags(p->tags),
+      compressor(p->compressor),
       prefetcher(p->prefetcher),
       writeAllocator(p->write_allocator),
       writebackClean(p->writeback_clean),
@@ -1034,7 +1036,16 @@
             }
 
             blk->status |= BlkReadable;
+        } else {
+            if (compressor) {
+                // This is an overwrite to an existing block, therefore we need
+                // to check for data expansion (i.e., block was compressed with
+                // a smaller size, and now it doesn't fit the entry anymore).
+                // If that is the case we might need to evict blocks.
+                // @todo Update compression data
+            }
         }
+
         // only mark the block dirty if we got a writeback command,
         // and leave it as is for a clean writeback
         if (pkt->cmd == MemCmd::WritebackDirty) {
@@ -1114,6 +1125,10 @@
 
                 blk->status |= BlkReadable;
             }
+        } else {
+            if (compressor) {
+                // @todo Update compression data
+            }
         }
 
         // at this point either this is a writeback or a write-through
@@ -1151,6 +1166,12 @@
         // Calculate access latency based on the need to access the data array
         if (pkt->isRead() || pkt->isWrite()) {
             lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
+
+            // When a block is compressed, it must first be decompressed
+            // before being read. This adds to the access latency.
+            if (compressor && pkt->isRead()) {
+                lat += compressor->getDecompressionLatency(blk);
+            }
         } else {
             lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
         }
@@ -1319,8 +1340,22 @@
     // Get secure bit
     const bool is_secure = pkt->isSecure();
 
-    // @todo Compress and get compression related data
+    // Block size and compression related access latency. Only relevant if
+    // using a compressor, otherwise there is no extra delay, and the block
+    // is fully sized
     std::size_t blk_size_bits = blkSize*8;
+    Cycles compression_lat = Cycles(0);
+    Cycles decompression_lat = Cycles(0);
+
+    // If a compressor is being used, it is called to compress data before
+    // insertion. Although in Gem5 the data is stored uncompressed, even if a
+    // compressor is used, the compression/decompression methods are called to
+    // calculate the amount of extra cycles needed to read or write compressed
+    // blocks.
+    if (compressor) {
+        compressor->compress(pkt->getConstPtr<uint64_t>(), compression_lat,
+                             decompression_lat, blk_size_bits);
+    }
 
     // Find replacement victim
     std::vector<CacheBlk*> evict_blks;
@@ -1377,6 +1412,13 @@
         replacements++;
     }
 
+    // If using a compressor, set compression data. This must be done before
+    // block insertion, as compressed tags use this information.
+    if (compressor) {
+        compressor->setSizeBits(victim, blk_size_bits);
+        compressor->setDecompressionLatency(victim, decompression_lat);
+    }
+
     // Insert new block at victimized entry
     tags->insertBlock(pkt, victim);
 
@@ -1443,6 +1485,12 @@
     pkt->allocate();
     pkt->setDataFromBlock(blk->data, blkSize);
 
+    // When a block is compressed, it must first be decompressed before being
+    // sent for writeback.
+    if (compressor) {
+        pkt->payloadDelay = compressor->getDecompressionLatency(blk);
+    }
+
     return pkt;
 }
 
@@ -1482,6 +1530,12 @@
     pkt->allocate();
     pkt->setDataFromBlock(blk->data, blkSize);
 
+    // When a block is compressed, it must first be decompressed before being
+    // sent for writeback.
+    if (compressor) {
+        pkt->payloadDelay = compressor->getDecompressionLatency(blk);
+    }
+
     return pkt;
 }
 
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index b995a6e..02b9e2d 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -64,6 +64,7 @@
 #include "debug/CachePort.hh"
 #include "enums/Clusivity.hh"
 #include "mem/cache/cache_blk.hh"
+#include "mem/cache/compressors/base.hh"
 #include "mem/cache/mshr_queue.hh"
 #include "mem/cache/tags/base.hh"
 #include "mem/cache/write_queue.hh"
@@ -324,6 +325,9 @@
     /** Tag and data Storage */
     BaseTags *tags;
 
+    /** Compression method being used. */
+    BaseCacheCompressor* compressor;
+
     /** Prefetcher */
     BasePrefetcher *prefetcher;
 
@@ -1070,6 +1074,15 @@
 
         Addr blk_addr = pkt->getBlockAddr(blkSize);
 
+        // If using compression, on evictions the block is decompressed and
+        // the operation's latency is added to the payload delay. Consume
+        // that payload delay here, meaning that the data is always stored
+        // uncompressed in the writebuffer
+        if (compressor) {
+            time += pkt->payloadDelay;
+            pkt->payloadDelay = 0;
+        }
+
         WriteQueueEntry *wq_entry =
             writeBuffer.findMatch(blk_addr, pkt->isSecure());
         if (wq_entry && !wq_entry->inService) {
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 4643e1d..a601a7b 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -1129,6 +1129,12 @@
             if (pkt->hasData())
                 pkt->setDataFromBlock(blk->data, blkSize);
         }
+
+        // When a block is compressed, it must first be decompressed before
+        // being read, and this increases the snoop delay.
+        if (compressor && pkt->isRead()) {
+            snoop_delay += compressor->getDecompressionLatency(blk);
+        }
     }
 
     if (!respond && is_deferred) {
diff --git a/src/mem/cache/compressors/base.cc b/src/mem/cache/compressors/base.cc
index 1ba2677..40244e2 100644
--- a/src/mem/cache/compressors/base.cc
+++ b/src/mem/cache/compressors/base.cc
@@ -111,13 +111,16 @@
 }
 
 Cycles
-BaseCacheCompressor::getDecompressionLatency(const CacheBlk* blk)
+BaseCacheCompressor::getDecompressionLatency(const CacheBlk* blk) const
 {
     const CompressionBlk* comp_blk = static_cast<const CompressionBlk*>(blk);
 
     // If block is compressed, return its decompression latency
     if (comp_blk && comp_blk->isCompressed()){
-        return comp_blk->getDecompressionLatency();
+        const Cycles decomp_lat = comp_blk->getDecompressionLatency();
+        DPRINTF(CacheComp, "Decompressing block: %s (%d cycles)\n",
+                comp_blk->print(), decomp_lat);
+        return decomp_lat;
     }
 
     // Block is not compressed, so there is no decompression latency
diff --git a/src/mem/cache/compressors/base.hh b/src/mem/cache/compressors/base.hh
index a19a072..f457ecd 100644
--- a/src/mem/cache/compressors/base.hh
+++ b/src/mem/cache/compressors/base.hh
@@ -132,7 +132,7 @@
      *
      * @param blk The compressed block.
      */
-    static Cycles getDecompressionLatency(const CacheBlk* blk);
+    Cycles getDecompressionLatency(const CacheBlk* blk) const;
 
     /**
      * Set the decompression latency of compressed block.
diff --git a/src/mem/cache/tags/compressed_tags.cc b/src/mem/cache/tags/compressed_tags.cc
index cc799df..46043be 100644
--- a/src/mem/cache/tags/compressed_tags.cc
+++ b/src/mem/cache/tags/compressed_tags.cc
@@ -37,6 +37,7 @@
 
 #include "mem/cache/replacement_policies/base.hh"
 #include "mem/cache/tags/indexing_policies/base.hh"
+#include "mem/packet.hh"
 #include "params/CompressedTags.hh"
 
 CompressedTags::CompressedTags(const Params *p)
@@ -93,6 +94,17 @@
 }
 
 void
+CompressedTags::insertBlock(const PacketPtr pkt, CacheBlk *blk)
+{
+    // Insert block
+    SectorTags::insertBlock(pkt, blk);
+
+    // @todo We always store compressed blocks when possible
+    CompressionBlk* compression_blk = static_cast<CompressionBlk*>(blk);
+    compression_blk->setUncompressed();
+}
+
+void
 CompressedTags::forEachBlk(std::function<void(CacheBlk &)> visitor)
 {
     for (CompressionBlk& blk : blks) {
diff --git a/src/mem/cache/tags/compressed_tags.hh b/src/mem/cache/tags/compressed_tags.hh
index 0bf96b5..303bc79 100644
--- a/src/mem/cache/tags/compressed_tags.hh
+++ b/src/mem/cache/tags/compressed_tags.hh
@@ -97,6 +97,14 @@
     void tagsInit() override;
 
     /**
+     * Insert the new block into the cache and update replacement data.
+     *
+     * @param pkt Packet holding the address to update
+     * @param blk The block to update.
+     */
+    void insertBlock(const PacketPtr pkt, CacheBlk *blk) override;
+
+    /**
      * Visit each sub-block in the tags and apply a visitor.
      *
      * The visitor should be a std::function that takes a cache block.