mem: Determine if an MSHR does a whole-line write

This patch adds support for determining whether the targets in an MSHR
are 1) only writes and 2) whether these writes are effectively a
whole-line write. This patch adds the necessary functions in the MSHR
to allow for write coalescing in the cache.

Change-Id: I2c9a9a83d2d9b506a491ba5b0b9ac1054bdb31b4
Reviewed-on: https://gem5-review.googlesource.com/c/12904
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc
index ccaec7b..5b93029 100644
--- a/src/mem/cache/mshr.cc
+++ b/src/mem/cache/mshr.cc
@@ -40,6 +40,7 @@
  *
  * Authors: Erik Hallnor
  *          Dave Greene
+ *          Nikos Nikoleris
  */
 
 /**
@@ -63,7 +64,7 @@
 MSHR::MSHR() : downstreamPending(false),
                pendingModified(false),
                postInvalidate(false), postDowngrade(false),
-               isForward(false)
+               wasWholeLineWrite(false), isForward(false)
 {
 }
 
@@ -95,6 +96,8 @@
 
         if (source != Target::FromPrefetcher) {
             hasFromCache = hasFromCache || pkt->fromCache();
+
+            updateWriteFlags(pkt);
         }
     }
 }
@@ -257,16 +260,19 @@
     order = _order;
     assert(target);
     isForward = false;
+    wasWholeLineWrite = false;
     _isUncacheable = target->req->isUncacheable();
     inService = false;
     downstreamPending = false;
-    assert(targets.isReset());
+
+    targets.init(blkAddr, blkSize);
+    deferredTargets.init(blkAddr, blkSize);
+
     // Don't know of a case where we would allocate a new MSHR for a
     // snoop (mem-side request), so set source according to request here
     Target::Source source = (target->cmd == MemCmd::HardPFReq) ?
         Target::FromPrefetcher : Target::FromCPU;
     targets.add(target, when_ready, _order, source, true, alloc_on_fill);
-    assert(deferredTargets.isReset());
 }
 
 
@@ -294,6 +300,10 @@
         // level where it's going to get a response
         targets.clearDownstreamPending();
     }
+    // if the line is not considered a whole-line write when sent
+    // downstream, make sure it is also not considered a whole-line
+    // write when receiving the response, and vice versa
+    wasWholeLineWrite = isWholeLineWrite();
 }
 
 
@@ -480,6 +490,7 @@
 MSHR::extractServiceableTargets(PacketPtr pkt)
 {
     TargetList ready_targets;
+    ready_targets.init(blkAddr, blkSize);
     // If the downstream MSHR got an invalidation request then we only
     // service the first of the FromCPU targets and any other
     // non-FromCPU target. This way the remaining FromCPU targets
diff --git a/src/mem/cache/mshr.hh b/src/mem/cache/mshr.hh
index 218de924..56b81b6 100644
--- a/src/mem/cache/mshr.hh
+++ b/src/mem/cache/mshr.hh
@@ -38,6 +38,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Erik Hallnor
+ *          Nikos Nikoleris
  */
 
 /**
@@ -52,11 +53,13 @@
 #include <iosfwd>
 #include <list>
 #include <string>
+#include <vector>
 
 #include "base/printable.hh"
 #include "base/types.hh"
 #include "mem/cache/queue_entry.hh"
 #include "mem/packet.hh"
+#include "mem/request.hh"
 #include "sim/core.hh"
 
 class BaseCache;
@@ -115,6 +118,9 @@
 
   public:
 
+    /** Track if we sent this as a whole line write or not */
+    bool wasWholeLineWrite;
+
     /** True if the entry is just a simple forward from an upper level */
     bool isForward;
 
@@ -187,7 +193,24 @@
         void updateFlags(PacketPtr pkt, Target::Source source,
                          bool alloc_on_fill);
 
+        /**
+         * Reset state
+         *
+         * @param blk_addr Address of the cache block
+         * @param blk_size Size of the cache block
+         */
+        void init(Addr blk_addr, Addr blk_size) {
+            blkAddr = blk_addr;
+            blkSize = blk_size;
+            writesBitmap.resize(blk_size);
+
+            resetFlags();
+        }
+
         void resetFlags() {
+            onlyWrites = true;
+            std::fill(writesBitmap.begin(), writesBitmap.end(), false);
+
             needsWritable = false;
             hasUpgrade = false;
             allocOnFill = false;
@@ -203,12 +226,44 @@
         void populateFlags();
 
         /**
+         * Add the specified packet in the TargetList. This function
+         * stores information related to the added packet and updates
+         * accordingly the flags.
+         *
+         * @param pkt Packet considered for adding
+         */
+        void updateWriteFlags(PacketPtr pkt) {
+             const Request::FlagsType noMergeFlags =
+                 Request::UNCACHEABLE |
+                 Request::STRICT_ORDER | Request::MMAPPED_IPR |
+                 Request::PRIVILEGED | Request::LLSC |
+                 Request::MEM_SWAP | Request::MEM_SWAP_COND |
+                 Request::SECURE;
+
+             // if we have already seen writes for the full block stop
+             // here, this might be a full line write followed by
+             // other compatible requests (e.g., reads)
+             if (!isWholeLineWrite()) {
+                 bool can_merge_write = pkt->isWrite() &&
+                     ((pkt->req->getFlags() & noMergeFlags) == 0);
+                 onlyWrites &= can_merge_write;
+                 if (onlyWrites) {
+                     auto offset = pkt->getOffset(blkSize);
+                     auto begin = writesBitmap.begin() + offset;
+                     std::fill(begin, begin + pkt->getSize(), true);
+                 }
+             }
+         }
+
+        /**
          * Tests if the flags of this TargetList have their default
          * values.
+         *
+         * @return True if the TargetList are reset, false otherwise.
          */
         bool isReset() const {
             return !needsWritable && !hasUpgrade && !allocOnFill &&
-                !hasFromCache;
+                !hasFromCache && onlyWrites;
         }
 
         /**
@@ -224,8 +279,7 @@
          * @param alloc_on_fill Whether it should allocate on a fill
          */
         void add(PacketPtr pkt, Tick readyTime, Counter order,
-                 Target::Source source, bool markPending,
-                 bool alloc_on_fill);
+                 Target::Source source, bool markPending, bool alloc_on_fill);
 
         /**
          * Convert upgrades to the equivalent request if the cache line they
@@ -238,6 +292,39 @@
         bool trySatisfyFunctional(PacketPtr pkt);
         void print(std::ostream &os, int verbosity,
                    const std::string &prefix) const;
+
+        /**
+         * Check if this list contains only compatible writes, and if they
+         * span the entire cache line. This is used as part of the
+         * miss-packet creation. Note that new requests may arrive after a
+         * miss-packet has been created, and for the fill we therefore use
+         * the wasWholeLineWrite field.
+         */
+        bool isWholeLineWrite() const
+        {
+            return onlyWrites &&
+                std::all_of(writesBitmap.begin(),
+                            writesBitmap.end(), [](bool i) { return i; });
+        }
+
+      private:
+        /** Address of the cache block for this list of targets. */
+        Addr blkAddr;
+
+        /** Size of the cache block. */
+        Addr blkSize;
+
+        /** Are we only dealing with writes. */
+        bool onlyWrites;
+
+        // NOTE: std::vector<bool> might not meet satisfy the
+        // ForwardIterator requirement and therefore cannot be used
+        // for writesBitmap.
+        /**
+         * Track which bytes are written by requests in this target
+         * list.
+         */
+        std::vector<char> writesBitmap;
     };
 
     /** A list of MSHRs. */
@@ -315,6 +402,16 @@
     TargetList deferredTargets;
 
   public:
+    /**
+     * Check if this MSHR contains only compatible writes, and if they
+     * span the entire cache line. This is used as part of the
+     * miss-packet creation. Note that new requests may arrive after a
+     * miss-packet has been created, and for the fill we therefore use
+     * the wasWholeLineWrite field.
+     */
+    bool isWholeLineWrite() const {
+        return targets.isWholeLineWrite();
+    }
 
     /**
      * Allocate a miss to this MSHR.