mem-cache: Handle data expansion

When a block in compressed form is overwriten, it may change
its size. If the new compressed size is bigger, and the total
size becomes bigger than the block size, one or more blocks
will have to be evicted. This is called data expansion, or
fat writes.

This change assumes that a first level cache cannot have a
compressor, since otherwise data expansion should have been
handled for atomic operations and writes. As such, data
expansions should only be seen on writebacks. As writebacks
are forwarded to the next level when failed, there should
be no data expansions when servicing misses either.

This patch adds the functionality to handle data expansions
by evicting the co-allocated blocks to make room for an
expanded block.

Change-Id: I0bd77bf6446bfae336889940b2f75d6f0c87e533
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/12087
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index e2149db..8d7d193 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -51,6 +51,7 @@
 #include "base/compiler.hh"
 #include "base/logging.hh"
 #include "debug/Cache.hh"
+#include "debug/CacheComp.hh"
 #include "debug/CachePort.hh"
 #include "debug/CacheRepl.hh"
 #include "debug/CacheVerbose.hh"
@@ -58,6 +59,7 @@
 #include "mem/cache/mshr.hh"
 #include "mem/cache/prefetch/base.hh"
 #include "mem/cache/queue_entry.hh"
+#include "mem/cache/tags/super_blk.hh"
 #include "params/BaseCache.hh"
 #include "params/WriteAllocator.hh"
 #include "sim/core.hh"
@@ -796,6 +798,105 @@
     return nullptr;
 }
 
+bool
+BaseCache::updateCompressionData(CacheBlk *blk, const uint64_t* data,
+                                 PacketList &writebacks)
+{
+    // tempBlock does not exist in the tags, so don't do anything for it.
+    if (blk == tempBlock) {
+        return true;
+    }
+
+    // Get superblock of the given block
+    CompressionBlk* compression_blk = static_cast<CompressionBlk*>(blk);
+    const SuperBlk* superblock = static_cast<const SuperBlk*>(
+        compression_blk->getSectorBlock());
+
+    // The compressor is called to compress the updated data, so that its
+    // metadata can be updated.
+    std::size_t compression_size = 0;
+    Cycles compression_lat = Cycles(0);
+    Cycles decompression_lat = Cycles(0);
+    compressor->compress(data, compression_lat, decompression_lat,
+                         compression_size);
+
+    // If block's compression factor increased, it may not be co-allocatable
+    // anymore. If so, some blocks might need to be evicted to make room for
+    // the bigger block
+
+    // Get previous compressed size
+    const std::size_t M5_VAR_USED prev_size = compression_blk->getSizeBits();
+
+    // Check if new data is co-allocatable
+    const bool is_co_allocatable = superblock->isCompressed(compression_blk) &&
+        superblock->canCoAllocate(compression_size);
+
+    // If block was compressed, possibly co-allocated with other blocks, and
+    // cannot be co-allocated anymore, one or more blocks must be evicted to
+    // make room for the expanded block. As of now we decide to evict the co-
+    // allocated blocks to make room for the expansion, but other approaches
+    // that take the replacement data of the superblock into account may
+    // generate better results
+    std::vector<CacheBlk*> evict_blks;
+    const bool was_compressed = compression_blk->isCompressed();
+    if (was_compressed && !is_co_allocatable) {
+        // Get all co-allocated blocks
+        for (const auto& sub_blk : superblock->blks) {
+            if (sub_blk->isValid() && (compression_blk != sub_blk)) {
+                // Check for transient state allocations. If any of the
+                // entries listed for eviction has a transient state, the
+                // allocation fails
+                const Addr repl_addr = regenerateBlkAddr(sub_blk);
+                const MSHR *repl_mshr =
+                    mshrQueue.findMatch(repl_addr, sub_blk->isSecure());
+                if (repl_mshr) {
+                    DPRINTF(CacheRepl, "Aborting data expansion of %s due " \
+                            "to replacement of block in transient state: %s\n",
+                            compression_blk->print(), sub_blk->print());
+                    // Too hard to replace block with transient state, so it
+                    // cannot be evicted. Mark the update as failed and expect
+                    // the caller to evict this block. Since this is called
+                    // only when writebacks arrive, and packets do not contain
+                    // compressed data, there is no need to decompress
+                    compression_blk->setSizeBits(blkSize * 8);
+                    compression_blk->setDecompressionLatency(Cycles(0));
+                    compression_blk->setUncompressed();
+                    return false;
+                }
+
+                evict_blks.push_back(sub_blk);
+            }
+        }
+
+        // Update the number of data expansions
+        dataExpansions++;
+
+        DPRINTF(CacheComp, "Data expansion: expanding [%s] from %d to %d bits"
+                "\n", blk->print(), prev_size, compression_size);
+    }
+
+    // We always store compressed blocks when possible
+    if (is_co_allocatable) {
+        compression_blk->setCompressed();
+    } else {
+        compression_blk->setUncompressed();
+    }
+    compression_blk->setSizeBits(compression_size);
+    compression_blk->setDecompressionLatency(decompression_lat);
+
+    // Evict valid blocks
+    for (const auto& evict_blk : evict_blks) {
+        if (evict_blk->isValid()) {
+            if (evict_blk->wasPrefetched()) {
+                unusedPrefetches++;
+            }
+            evictBlock(evict_blk, writebacks);
+        }
+    }
+
+    return true;
+}
+
 void
 BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool)
 {
@@ -1036,13 +1137,23 @@
             }
 
             blk->status |= BlkReadable;
-        } else {
-            if (compressor) {
-                // This is an overwrite to an existing block, therefore we need
-                // to check for data expansion (i.e., block was compressed with
-                // a smaller size, and now it doesn't fit the entry anymore).
-                // If that is the case we might need to evict blocks.
-                // @todo Update compression data
+        } else if (compressor) {
+            // This is an overwrite to an existing block, therefore we need
+            // to check for data expansion (i.e., block was compressed with
+            // a smaller size, and now it doesn't fit the entry anymore).
+            // If that is the case we might need to evict blocks.
+            if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
+                writebacks)) {
+                // This is a failed data expansion (write), which happened
+                // after finding the replacement entries and accessing the
+                // block's data. There were no replaceable entries available
+                // to make room for the expanded block, and since it does not
+                // fit anymore and it has been properly updated to contain
+                // the new data, forward it to the next level
+                lat = calculateAccessLatency(blk, pkt->headerDelay,
+                                             tag_latency);
+                invalidateBlock(blk);
+                return false;
             }
         }
 
@@ -1125,9 +1236,23 @@
 
                 blk->status |= BlkReadable;
             }
-        } else {
-            if (compressor) {
-                // @todo Update compression data
+        } else if (compressor) {
+            // This is an overwrite to an existing block, therefore we need
+            // to check for data expansion (i.e., block was compressed with
+            // a smaller size, and now it doesn't fit the entry anymore).
+            // If that is the case we might need to evict blocks.
+            if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
+                writebacks)) {
+                // This is a failed data expansion (write), which happened
+                // after finding the replacement entries and accessing the
+                // block's data. There were no replaceable entries available
+                // to make room for the expanded block, and since it does not
+                // fit anymore and it has been properly updated to contain
+                // the new data, forward it to the next level
+                lat = calculateAccessLatency(blk, pkt->headerDelay,
+                                             tag_latency);
+                invalidateBlock(blk);
+                return false;
             }
         }
 
@@ -1155,8 +1280,7 @@
         blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
             std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay));
 
-        // if this a write-through packet it will be sent to cache
-        // below
+        // If this a write-through packet it will be sent to cache below
         return !pkt->writeThrough();
     } else if (blk && (pkt->needsWritable() ? blk->isWritable() :
                        blk->isReadable())) {
@@ -2365,6 +2489,12 @@
         .name(name() + ".replacements")
         .desc("number of replacements")
         ;
+
+    dataExpansions
+        .name(name() + ".data_expansions")
+        .desc("number of data expansions")
+        .flags(nozero | nonan)
+        ;
 }
 
 void
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 02b9e2d..362381b 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -660,6 +660,33 @@
     EventFunctionWrapper writebackTempBlockAtomicEvent;
 
     /**
+     * When a block is overwriten, its compression information must be updated,
+     * and it may need to be recompressed. If the compression size changes, the
+     * block may either become smaller, in which case there is no side effect,
+     * or bigger (data expansion; fat write), in which case the block might not
+     * fit in its current location anymore. If that happens, there are usually
+     * two options to be taken:
+     *
+     * - The co-allocated blocks must be evicted to make room for this block.
+     *   Simpler, but ignores replacement data.
+     * - The block itself is moved elsewhere (used in policies where the CF
+     *   determines the location of the block).
+     *
+     * This implementation uses the first approach.
+     *
+     * Notice that this is only called for writebacks, which means that L1
+     * caches (which see regular Writes), do not support compression.
+     * @sa CompressedTags
+     *
+     * @param blk The block to be overwriten.
+     * @param data A pointer to the data to be compressed (blk's new data).
+     * @param writebacks List for any writebacks that need to be performed.
+     * @return Whether operation is successful or not.
+     */
+    bool updateCompressionData(CacheBlk *blk, const uint64_t* data,
+                               PacketList &writebacks);
+
+    /**
      * Perform any necessary updates to the block and perform any data
      * exchange between the packet and the block. The flags of the
      * packet are also set accordingly.
@@ -1016,6 +1043,9 @@
     /** Number of replacements of valid blocks. */
     Stats::Scalar replacements;
 
+    /** Number of data expansions. */
+    Stats::Scalar dataExpansions;
+
     /**
      * @}
      */
diff --git a/src/mem/cache/tags/compressed_tags.cc b/src/mem/cache/tags/compressed_tags.cc
index 94b9293..0896a1b 100644
--- a/src/mem/cache/tags/compressed_tags.cc
+++ b/src/mem/cache/tags/compressed_tags.cc
@@ -63,6 +63,10 @@
         // Locate next cache superblock
         SuperBlk* superblock = &superBlks[superblock_index];
 
+        // Superblocks must be aware of the block size due to their co-
+        // allocation conditions
+        superblock->setBlkSize(blkSize);
+
         // Link block to indexing policy
         indexingPolicy->setEntry(superblock, superblock_index);
 
@@ -96,17 +100,6 @@
     }
 }
 
-bool
-CompressedTags::canCoAllocate(const SuperBlk* superblock,
-                              const std::size_t compressed_size) const
-{
-    // Simple co-allocation function: at most numBlocksPerSector blocks that
-    // compress at least to (100/numBlocksPerSector)% of their original size
-    // can share a superblock
-    return superblock->isCompressed() &&
-           (compressed_size <= (blkSize * 8) / numBlocksPerSector);
-}
-
 CacheBlk*
 CompressedTags::findVictim(Addr addr, const bool is_secure,
                            const std::size_t compressed_size,
@@ -127,7 +120,8 @@
         if ((tag == superblock->getTag()) && superblock->isValid() &&
             (is_secure == superblock->isSecure()) &&
             !superblock->blks[offset]->isValid() &&
-            canCoAllocate(superblock, compressed_size))
+            superblock->isCompressed() &&
+            superblock->canCoAllocate(compressed_size))
         {
             victim_superblock = superblock;
             is_co_allocation = true;
@@ -171,12 +165,23 @@
 void
 CompressedTags::insertBlock(const PacketPtr pkt, CacheBlk *blk)
 {
+    // We check if block can co-allocate before inserting, because this check
+    // assumes the block is still invalid
+    CompressionBlk* compression_blk = static_cast<CompressionBlk*>(blk);
+    const SuperBlk* superblock = static_cast<const SuperBlk*>(
+        compression_blk->getSectorBlock());
+    const bool is_co_allocatable = superblock->isCompressed() &&
+        superblock->canCoAllocate(compression_blk->getSizeBits());
+
     // Insert block
     SectorTags::insertBlock(pkt, blk);
 
-    // @todo We always store compressed blocks when possible
-    CompressionBlk* compression_blk = static_cast<CompressionBlk*>(blk);
-    compression_blk->setUncompressed();
+    // We always store compressed blocks when possible
+    if (is_co_allocatable) {
+        compression_blk->setCompressed();
+    } else {
+        compression_blk->setUncompressed();
+    }
 }
 
 void
diff --git a/src/mem/cache/tags/compressed_tags.hh b/src/mem/cache/tags/compressed_tags.hh
index f9321b9..2c2a699 100644
--- a/src/mem/cache/tags/compressed_tags.hh
+++ b/src/mem/cache/tags/compressed_tags.hh
@@ -98,16 +98,6 @@
     void tagsInit() override;
 
     /**
-     * Checks whether a superblock can co-allocate given compressed data block.
-     *
-     * @param superblock Superblock to check.
-     * @param compressed_size Size, in bits, of new block to allocate.
-     * @return True if block can be co-allocated in superblock.
-     */
-    bool canCoAllocate(const SuperBlk* superblock,
-                       const std::size_t compressed_size) const;
-
-    /**
      * Find replacement victim based on address. Checks if data can be co-
      * allocated before choosing blocks to be evicted.
      *
diff --git a/src/mem/cache/tags/super_blk.cc b/src/mem/cache/tags/super_blk.cc
index 530a2c0..527e8cb 100644
--- a/src/mem/cache/tags/super_blk.cc
+++ b/src/mem/cache/tags/super_blk.cc
@@ -94,10 +94,10 @@
 }
 
 bool
-SuperBlk::isCompressed() const
+SuperBlk::isCompressed(const CompressionBlk* ignored_blk) const
 {
     for (const auto& blk : blks) {
-        if (blk->isValid()) {
+        if (blk->isValid() && (blk != ignored_blk)) {
             return static_cast<CompressionBlk*>(blk)->isCompressed();
         }
     }
@@ -105,3 +105,19 @@
     // An invalid block is seen as compressed
     return true;
 }
+
+bool
+SuperBlk::canCoAllocate(const std::size_t compressed_size) const
+{
+    // Simple co-allocation function: at most numBlocksPerSector blocks that
+    // compress at least to (100/numBlocksPerSector)% of their original size
+    // can share a superblock
+    return (compressed_size <= (blkSize * 8) / blks.size());
+}
+
+void
+SuperBlk::setBlkSize(const std::size_t blk_size)
+{
+    assert(blkSize == 0);
+    blkSize = blk_size;
+}
diff --git a/src/mem/cache/tags/super_blk.hh b/src/mem/cache/tags/super_blk.hh
index bf35c69..0fe2825 100644
--- a/src/mem/cache/tags/super_blk.hh
+++ b/src/mem/cache/tags/super_blk.hh
@@ -126,8 +126,12 @@
  */
 class SuperBlk : public SectorBlk
 {
+  protected:
+    /** Block size, in bytes. */
+    std::size_t blkSize;
+
   public:
-    SuperBlk() : SectorBlk() {}
+    SuperBlk() : SectorBlk(), blkSize(0) {}
     SuperBlk(const SuperBlk&) = delete;
     SuperBlk& operator=(const SuperBlk&) = delete;
     ~SuperBlk() {};
@@ -136,9 +140,25 @@
      * Returns whether the superblock contains compressed blocks or not. By
      * default, if not blocks are valid, the superblock is compressible.
      *
+     * @param ignored_blk If provided don't consider the given block.
      * @return The compressibility state of the superblock.
      */
-    bool isCompressed() const;
+    bool isCompressed(const CompressionBlk* ignored_blk = nullptr) const;
+
+    /**
+     * Checks whether a superblock can co-allocate given compressed data block.
+     *
+     * @param compressed_size Size, in bits, of new block to allocate.
+     * @return True if block can be co-allocated in superblock.
+     */
+    bool canCoAllocate(const std::size_t compressed_size) const;
+
+    /**
+     * Set block size. Should be called only once, when initializing blocks.
+     *
+     * @param blk_size The uncompressed block size.
+     */
+    void setBlkSize(const std::size_t blk_size);
 };
 
 #endif //__MEM_CACHE_TAGS_SUPER_BLK_HH__