mem-cache: Remove writebacks packet list

Previously all atomic writebacks concerned a single block,
therefore, when a block was evicted, no other block would be
pending eviction. With sector tags (and compression),
however, a single replacement can generate many evictions.

This can cause problems, since a writeback that evicts a block
may evict blocks in the lower cache. If one of these conflict
with one of the blocks pending eviction in the higher level, the
snoop must inform it to the lower level. Since atomic mode does
not have a writebuffer, this kind of conflict wouldn't be noticed.

Therefore, instead of evicting multiple blocks at once, we
do it one by one.

Change-Id: I2fc2f9eb0f26248ddf91adbe987d158f5a2e592b
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18209
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index 8d7d193..8929343 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -342,20 +342,11 @@
     // the delay provided by the crossbar
     Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
 
+    // Note that lat is passed by reference here. The function
+    // access() will set the lat value.
     Cycles lat;
     CacheBlk *blk = nullptr;
-    bool satisfied = false;
-    {
-        PacketList writebacks;
-        // Note that lat is passed by reference here. The function
-        // access() will set the lat value.
-        satisfied = access(pkt, blk, lat, writebacks);
-
-        // After the evicted blocks are selected, they must be forwarded
-        // to the write buffer to ensure they logically precede anything
-        // happening below
-        doWritebacks(writebacks, clockEdge(lat + forwardLatency));
-    }
+    bool satisfied = access(pkt, blk, lat);
 
     // Here we charge the headerDelay that takes into account the latencies
     // of the bus, if the packet comes from it.
@@ -457,8 +448,6 @@
             miss_latency;
     }
 
-    PacketList writebacks;
-
     bool is_fill = !mshr->isForward &&
         (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp ||
          mshr->wasWholeLineWrite);
@@ -475,7 +464,7 @@
 
         const bool allocate = (writeAllocator && mshr->wasWholeLineWrite) ?
             writeAllocator->allocate() : mshr->allocOnFill();
-        blk = handleFill(pkt, blk, writebacks, allocate);
+        blk = handleFill(pkt, blk, allocate);
         assert(blk != nullptr);
         ppFill->notify(pkt);
     }
@@ -531,13 +520,9 @@
 
     // if we used temp block, check to see if its valid and then clear it out
     if (blk == tempBlock && tempBlock->isValid()) {
-        evictBlock(blk, writebacks);
+        evictBlock(blk, clockEdge(forwardLatency) + pkt->headerDelay);
     }
 
-    const Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
-    // copy writebacks to write buffer
-    doWritebacks(writebacks, forward_time);
-
     DPRINTF(CacheVerbose, "%s: Leaving with %s\n", __func__, pkt->print());
     delete pkt;
 }
@@ -555,8 +540,7 @@
     Cycles lat = lookupLatency;
 
     CacheBlk *blk = nullptr;
-    PacketList writebacks;
-    bool satisfied = access(pkt, blk, lat, writebacks);
+    bool satisfied = access(pkt, blk, lat);
 
     if (pkt->isClean() && blk && blk->isDirty()) {
         // A cache clean opearation is looking for a dirty
@@ -566,17 +550,12 @@
         DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
                 __func__, pkt->print(), blk->print());
         PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id);
-        writebacks.push_back(wb_pkt);
         pkt->setSatisfied();
+        doWritebacksAtomic(wb_pkt);
     }
 
-    // handle writebacks resulting from the access here to ensure they
-    // logically precede anything happening below
-    doWritebacksAtomic(writebacks);
-    assert(writebacks.empty());
-
     if (!satisfied) {
-        lat += handleAtomicReqMiss(pkt, blk, writebacks);
+        lat += handleAtomicReqMiss(pkt, blk);
     }
 
     // Note that we don't invoke the prefetcher at all in atomic mode.
@@ -590,9 +569,6 @@
     // immediately rather than calling requestMemSideBus() as we do
     // there).
 
-    // do any writebacks resulting from the response handling
-    doWritebacksAtomic(writebacks);
-
     // if we used temp block, check to see if its valid and if so
     // clear it out, but only do so after the call to recvAtomic is
     // finished so that any downstream observers (such as a snoop
@@ -800,7 +776,7 @@
 
 bool
 BaseCache::updateCompressionData(CacheBlk *blk, const uint64_t* data,
-                                 PacketList &writebacks)
+    uint32_t delay, Cycles tag_latency)
 {
     // tempBlock does not exist in the tags, so don't do anything for it.
     if (blk == tempBlock) {
@@ -890,7 +866,8 @@
             if (evict_blk->wasPrefetched()) {
                 unusedPrefetches++;
             }
-            evictBlock(evict_blk, writebacks);
+            Cycles lat = calculateAccessLatency(evict_blk, delay, tag_latency);
+            evictBlock(evict_blk, clockEdge(lat + forwardLatency));
         }
     }
 
@@ -1024,8 +1001,7 @@
 }
 
 bool
-BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
-                  PacketList &writebacks)
+BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat)
 {
     // sanity check
     assert(pkt->isRequest());
@@ -1124,7 +1100,7 @@
 
         if (!blk) {
             // need to do a replacement
-            blk = allocateBlock(pkt, writebacks);
+            blk = allocateBlock(pkt, tag_latency);
             if (!blk) {
                 // no replaceable block available: give up, fwd to next level.
                 incMissCount(pkt);
@@ -1143,7 +1119,7 @@
             // a smaller size, and now it doesn't fit the entry anymore).
             // If that is the case we might need to evict blocks.
             if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
-                writebacks)) {
+                pkt->headerDelay, tag_latency)) {
                 // This is a failed data expansion (write), which happened
                 // after finding the replacement entries and accessing the
                 // block's data. There were no replaceable entries available
@@ -1219,7 +1195,7 @@
                 return false;
             } else {
                 // a writeback that misses needs to allocate a new block
-                blk = allocateBlock(pkt, writebacks);
+                blk = allocateBlock(pkt, tag_latency);
                 if (!blk) {
                     // no replaceable block available: give up, fwd to
                     // next level.
@@ -1242,7 +1218,7 @@
             // a smaller size, and now it doesn't fit the entry anymore).
             // If that is the case we might need to evict blocks.
             if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
-                writebacks)) {
+                pkt->headerDelay, tag_latency)) {
                 // This is a failed data expansion (write), which happened
                 // after finding the replacement entries and accessing the
                 // block's data. There were no replaceable entries available
@@ -1335,8 +1311,7 @@
 }
 
 CacheBlk*
-BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks,
-                      bool allocate)
+BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, bool allocate)
 {
     assert(pkt->isResponse());
     Addr addr = pkt->getAddr();
@@ -1353,9 +1328,12 @@
         // better have read new data...
         assert(pkt->hasData() || pkt->cmd == MemCmd::InvalidateResp);
 
-        // need to do a replacement if allocating, otherwise we stick
-        // with the temporary storage
-        blk = allocate ? allocateBlock(pkt, writebacks) : nullptr;
+        // Need to do a replacement if allocating, otherwise we stick
+        // with the temporary storage. The tag lookup has already been
+        // done to decide the eviction victims, so it is set to 0 here.
+        // The eviction itself, however, is delayed until the new data
+        // for the block that is requesting the replacement arrives.
+        blk = allocate ? allocateBlock(pkt, Cycles(0)) : nullptr;
 
         if (!blk) {
             // No replaceable block or a mostly exclusive
@@ -1456,7 +1434,7 @@
 }
 
 CacheBlk*
-BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks)
+BaseCache::allocateBlock(const PacketPtr pkt, Cycles tag_latency)
 {
     // Get address
     const Addr addr = pkt->getAddr();
@@ -1529,7 +1507,9 @@
                     unusedPrefetches++;
                 }
 
-                evictBlock(blk, writebacks);
+                Cycles lat =
+                    calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
+                evictBlock(blk, clockEdge(lat + forwardLatency));
             }
         }
 
@@ -1562,11 +1542,15 @@
 }
 
 void
-BaseCache::evictBlock(CacheBlk *blk, PacketList &writebacks)
+BaseCache::evictBlock(CacheBlk *blk, Tick forward_timing)
 {
     PacketPtr pkt = evictBlock(blk);
     if (pkt) {
-        writebacks.push_back(pkt);
+        if (system->isTimingMode()) {
+            doWritebacks(pkt, forward_timing);
+        } else {
+            doWritebacksAtomic(pkt);
+        }
     }
 }
 
@@ -1835,9 +1819,7 @@
                     __func__, pkt->print(), blk->print());
             PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(),
                                              pkt->id);
-            PacketList writebacks;
-            writebacks.push_back(wb_pkt);
-            doWritebacks(writebacks, 0);
+            doWritebacks(wb_pkt, 0);
         }
 
         return false;
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 362381b..bf190a5 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -454,11 +454,9 @@
      * @param pkt The memory request to perform.
      * @param blk The cache block to be updated.
      * @param lat The latency of the access.
-     * @param writebacks List for any writebacks that need to be performed.
      * @return Boolean indicating whether the request was satisfied.
      */
-    virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
-                        PacketList &writebacks);
+    virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat);
 
     /*
      * Handle a timing request that hit in the cache
@@ -551,11 +549,9 @@
      *
      * @param pkt The packet with the requests
      * @param blk The referenced block
-     * @param writebacks A list with packets for any performed writebacks
      * @return Cycles for handling the request
      */
-    virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
-                                       PacketList &writebacks) = 0;
+    virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk) = 0;
 
     /**
      * Performs the access specified by the request.
@@ -595,13 +591,18 @@
 
     /**
      * Insert writebacks into the write buffer
+     *
+     * @param pkt The writeback packet.
+     * @param forward_time Tick to which the writeback should be scheduled.
      */
-    virtual void doWritebacks(PacketList& writebacks, Tick forward_time) = 0;
+    virtual void doWritebacks(PacketPtr pkt, Tick forward_time) = 0;
 
     /**
-     * Send writebacks down the memory hierarchy in atomic mode
+     * Send writebacks down the memory hierarchy in atomic mode.
+     *
+     * @param pkt The writeback packet.
      */
-    virtual void doWritebacksAtomic(PacketList& writebacks) = 0;
+    virtual void doWritebacksAtomic(PacketPtr pkt) = 0;
 
     /**
      * Create an appropriate downstream bus request packet.
@@ -647,8 +648,7 @@
      */
     void writebackTempBlockAtomic() {
         assert(tempBlockWriteback != nullptr);
-        PacketList writebacks{tempBlockWriteback};
-        doWritebacksAtomic(writebacks);
+        doWritebacksAtomic(tempBlockWriteback);
         tempBlockWriteback = nullptr;
     }
 
@@ -680,11 +680,12 @@
      *
      * @param blk The block to be overwriten.
      * @param data A pointer to the data to be compressed (blk's new data).
-     * @param writebacks List for any writebacks that need to be performed.
+     * @param delay The delay until the packet's metadata is present.
+     * @param tag_latency Latency to access the tags of the replacement victim.
      * @return Whether operation is successful or not.
      */
     bool updateCompressionData(CacheBlk *blk, const uint64_t* data,
-                               PacketList &writebacks);
+        uint32_t delay, Cycles tag_latency);
 
     /**
      * Perform any necessary updates to the block and perform any data
@@ -717,34 +718,27 @@
      * Populates a cache block and handles all outstanding requests for the
      * satisfied fill request. This version takes two memory requests. One
      * contains the fill data, the other is an optional target to satisfy.
-     * Note that the reason we return a list of writebacks rather than
-     * inserting them directly in the write buffer is that this function
-     * is called by both atomic and timing-mode accesses, and in atomic
-     * mode we don't mess with the write buffer (we just perform the
-     * writebacks atomically once the original request is complete).
      *
      * @param pkt The memory request with the fill data.
      * @param blk The cache block if it already exists.
-     * @param writebacks List for any writebacks that need to be performed.
      * @param allocate Whether to allocate a block or use the temp block
      * @return Pointer to the new cache block.
      */
-    CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk,
-                         PacketList &writebacks, bool allocate);
+    CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk, bool allocate);
 
     /**
-     * Allocate a new block and perform any necessary writebacks
-     *
-     * Find a victim block and if necessary prepare writebacks for any
-     * existing data. May return nullptr if there are no replaceable
-     * blocks. If a replaceable block is found, it inserts the new block in
-     * its place. The new block, however, is not set as valid yet.
+     * Allocate a new block for the packet's data. The victim block might be
+     * valid, and thus the necessary writebacks are done. May return nullptr
+     * if there are no replaceable blocks. If a replaceable block is found,
+     * it inserts the new block in its place. The new block, however, is not
+     * set as valid yet.
      *
      * @param pkt Packet holding the address to update
-     * @param writebacks A list of writeback packets for the evicted blocks
+     * @param tag_latency Latency to access the tags of the replacement victim.
      * @return the allocated block
      */
-    CacheBlk *allocateBlock(const PacketPtr pkt, PacketList &writebacks);
+    CacheBlk *allocateBlock(const PacketPtr pkt, Cycles tag_latency);
+
     /**
      * Evict a cache block.
      *
@@ -761,9 +755,10 @@
      * Performs a writeback if necesssary and invalidates the block
      *
      * @param blk Block to invalidate
-     * @param writebacks Return a list of packets with writebacks
+     * @param forward_time Tick to which the writeback should be scheduled if
+     *                     in timing mode.
      */
-    void evictBlock(CacheBlk *blk, PacketList &writebacks);
+    void evictBlock(CacheBlk *blk, Tick forward_time);
 
     /**
      * Invalidate a cache block.
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index a601a7b..494a998 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -161,8 +161,7 @@
 /////////////////////////////////////////////////////
 
 bool
-Cache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
-              PacketList &writebacks)
+Cache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat)
 {
 
     if (pkt->req->isUncacheable()) {
@@ -174,97 +173,90 @@
 
         DPRINTF(Cache, "%s for %s\n", __func__, pkt->print());
 
+        // lookupLatency is the latency in case the request is uncacheable.
+        lat = lookupLatency;
+
         // flush and invalidate any existing block
         CacheBlk *old_blk(tags->findBlock(pkt->getAddr(), pkt->isSecure()));
         if (old_blk && old_blk->isValid()) {
-            BaseCache::evictBlock(old_blk, writebacks);
+            BaseCache::evictBlock(old_blk, clockEdge(lat + forwardLatency));
         }
 
         blk = nullptr;
-        // lookupLatency is the latency in case the request is uncacheable.
-        lat = lookupLatency;
         return false;
     }
 
-    return BaseCache::access(pkt, blk, lat, writebacks);
+    return BaseCache::access(pkt, blk, lat);
 }
 
 void
-Cache::doWritebacks(PacketList& writebacks, Tick forward_time)
+Cache::doWritebacks(PacketPtr pkt, Tick forward_time)
 {
-    while (!writebacks.empty()) {
-        PacketPtr wbPkt = writebacks.front();
-        // We use forwardLatency here because we are copying writebacks to
-        // write buffer.
+    // We use forwardLatency here because we are copying writebacks to
+    // write buffer.
 
-        // Call isCachedAbove for Writebacks, CleanEvicts and
-        // WriteCleans to discover if the block is cached above.
-        if (isCachedAbove(wbPkt)) {
-            if (wbPkt->cmd == MemCmd::CleanEvict) {
-                // Delete CleanEvict because cached copies exist above. The
-                // packet destructor will delete the request object because
-                // this is a non-snoop request packet which does not require a
-                // response.
-                delete wbPkt;
-            } else if (wbPkt->cmd == MemCmd::WritebackClean) {
-                // clean writeback, do not send since the block is
-                // still cached above
-                assert(writebackClean);
-                delete wbPkt;
-            } else {
-                assert(wbPkt->cmd == MemCmd::WritebackDirty ||
-                       wbPkt->cmd == MemCmd::WriteClean);
-                // Set BLOCK_CACHED flag in Writeback and send below, so that
-                // the Writeback does not reset the bit corresponding to this
-                // address in the snoop filter below.
-                wbPkt->setBlockCached();
-                allocateWriteBuffer(wbPkt, forward_time);
-            }
+    // Call isCachedAbove for Writebacks, CleanEvicts and
+    // WriteCleans to discover if the block is cached above.
+    if (isCachedAbove(pkt)) {
+        if (pkt->cmd == MemCmd::CleanEvict) {
+            // Delete CleanEvict because cached copies exist above. The
+            // packet destructor will delete the request object because
+            // this is a non-snoop request packet which does not require a
+            // response.
+            delete pkt;
+        } else if (pkt->cmd == MemCmd::WritebackClean) {
+            // clean writeback, do not send since the block is
+            // still cached above
+            assert(writebackClean);
+            delete pkt;
         } else {
-            // If the block is not cached above, send packet below. Both
-            // CleanEvict and Writeback with BLOCK_CACHED flag cleared will
-            // reset the bit corresponding to this address in the snoop filter
-            // below.
-            allocateWriteBuffer(wbPkt, forward_time);
+            assert(pkt->cmd == MemCmd::WritebackDirty ||
+                   pkt->cmd == MemCmd::WriteClean);
+            // Set BLOCK_CACHED flag in Writeback and send below, so that
+            // the Writeback does not reset the bit corresponding to this
+            // address in the snoop filter below.
+            pkt->setBlockCached();
+            allocateWriteBuffer(pkt, forward_time);
         }
-        writebacks.pop_front();
+    } else {
+        // If the block is not cached above, send packet below. Both
+        // CleanEvict and Writeback with BLOCK_CACHED flag cleared will
+        // reset the bit corresponding to this address in the snoop filter
+        // below.
+        allocateWriteBuffer(pkt, forward_time);
     }
 }
 
 void
-Cache::doWritebacksAtomic(PacketList& writebacks)
+Cache::doWritebacksAtomic(PacketPtr pkt)
 {
-    while (!writebacks.empty()) {
-        PacketPtr wbPkt = writebacks.front();
-        // Call isCachedAbove for both Writebacks and CleanEvicts. If
-        // isCachedAbove returns true we set BLOCK_CACHED flag in Writebacks
-        // and discard CleanEvicts.
-        if (isCachedAbove(wbPkt, false)) {
-            if (wbPkt->cmd == MemCmd::WritebackDirty ||
-                wbPkt->cmd == MemCmd::WriteClean) {
-                // Set BLOCK_CACHED flag in Writeback and send below,
-                // so that the Writeback does not reset the bit
-                // corresponding to this address in the snoop filter
-                // below. We can discard CleanEvicts because cached
-                // copies exist above. Atomic mode isCachedAbove
-                // modifies packet to set BLOCK_CACHED flag
-                memSidePort.sendAtomic(wbPkt);
-            }
-        } else {
-            // If the block is not cached above, send packet below. Both
-            // CleanEvict and Writeback with BLOCK_CACHED flag cleared will
-            // reset the bit corresponding to this address in the snoop filter
-            // below.
-            memSidePort.sendAtomic(wbPkt);
+    // Call isCachedAbove for both Writebacks and CleanEvicts. If
+    // isCachedAbove returns true we set BLOCK_CACHED flag in Writebacks
+    // and discard CleanEvicts.
+    if (isCachedAbove(pkt, false)) {
+        if (pkt->cmd == MemCmd::WritebackDirty ||
+            pkt->cmd == MemCmd::WriteClean) {
+            // Set BLOCK_CACHED flag in Writeback and send below,
+            // so that the Writeback does not reset the bit
+            // corresponding to this address in the snoop filter
+            // below. We can discard CleanEvicts because cached
+            // copies exist above. Atomic mode isCachedAbove
+            // modifies packet to set BLOCK_CACHED flag
+            memSidePort.sendAtomic(pkt);
         }
-        writebacks.pop_front();
-        // In case of CleanEvicts, the packet destructor will delete the
-        // request object because this is a non-snoop request packet which
-        // does not require a response.
-        delete wbPkt;
+    } else {
+        // If the block is not cached above, send packet below. Both
+        // CleanEvict and Writeback with BLOCK_CACHED flag cleared will
+        // reset the bit corresponding to this address in the snoop filter
+        // below.
+        memSidePort.sendAtomic(pkt);
     }
-}
 
+    // In case of CleanEvicts, the packet destructor will delete the
+    // request object because this is a non-snoop request packet which
+    // does not require a response.
+    delete pkt;
+}
 
 void
 Cache::recvTimingSnoopResp(PacketPtr pkt)
@@ -562,8 +554,7 @@
 
 
 Cycles
-Cache::handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
-                           PacketList &writebacks)
+Cache::handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk)
 {
     // deal with the packets that go through the write path of
     // the cache, i.e. any evictions and writes
@@ -625,7 +616,7 @@
                 // the write to a whole line
                 const bool allocate = allocOnFill(pkt->cmd) &&
                     (!writeAllocator || writeAllocator->allocate());
-                blk = handleFill(bus_pkt, blk, writebacks, allocate);
+                blk = handleFill(bus_pkt, blk, allocate);
                 assert(blk != NULL);
                 is_invalidate = false;
                 satisfyRequest(pkt, blk);
@@ -633,8 +624,7 @@
                        bus_pkt->cmd == MemCmd::UpgradeResp) {
                 // we're updating cache state to allow us to
                 // satisfy the upstream request from the cache
-                blk = handleFill(bus_pkt, blk, writebacks,
-                                 allocOnFill(pkt->cmd));
+                blk = handleFill(bus_pkt, blk, allocOnFill(pkt->cmd));
                 satisfyRequest(pkt, blk);
                 maintainClusivity(pkt->fromCache(), blk);
             } else {
@@ -1020,17 +1010,15 @@
             DPRINTF(CacheVerbose, "%s: packet (snoop) %s found block: %s\n",
                     __func__, pkt->print(), blk->print());
             PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id);
-            PacketList writebacks;
-            writebacks.push_back(wb_pkt);
 
             if (is_timing) {
                 // anything that is merely forwarded pays for the forward
                 // latency and the delay provided by the crossbar
                 Tick forward_time = clockEdge(forwardLatency) +
                     pkt->headerDelay;
-                doWritebacks(writebacks, forward_time);
+                doWritebacks(wb_pkt, forward_time);
             } else {
-                doWritebacksAtomic(writebacks);
+                doWritebacksAtomic(wb_pkt);
             }
             pkt->setSatisfied();
         }
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 33c5a24..d1b876e 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -87,8 +87,7 @@
      */
     void promoteWholeLineWrites(PacketPtr pkt);
 
-    bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
-                PacketList &writebacks) override;
+    bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat) override;
 
     void handleTimingReqHit(PacketPtr pkt, CacheBlk *blk,
                             Tick request_time) override;
@@ -99,9 +98,9 @@
 
     void recvTimingReq(PacketPtr pkt) override;
 
-    void doWritebacks(PacketList& writebacks, Tick forward_time) override;
+    void doWritebacks(PacketPtr pkt, Tick forward_time) override;
 
-    void doWritebacksAtomic(PacketList& writebacks) override;
+    void doWritebacksAtomic(PacketPtr pkt) override;
 
     void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
                             CacheBlk *blk) override;
@@ -110,8 +109,7 @@
 
     void recvTimingSnoopResp(PacketPtr pkt) override;
 
-    Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
-                               PacketList &writebacks) override;
+    Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk) override;
 
     Tick recvAtomic(PacketPtr pkt) override;
 
diff --git a/src/mem/cache/noncoherent_cache.cc b/src/mem/cache/noncoherent_cache.cc
index 9a2a1db..5ad75ee 100644
--- a/src/mem/cache/noncoherent_cache.cc
+++ b/src/mem/cache/noncoherent_cache.cc
@@ -80,10 +80,9 @@
 }
 
 bool
-NoncoherentCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
-                         PacketList &writebacks)
+NoncoherentCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat)
 {
-    bool success = BaseCache::access(pkt, blk, lat, writebacks);
+    bool success = BaseCache::access(pkt, blk, lat);
 
     if (pkt->isWriteback() || pkt->cmd == MemCmd::WriteClean) {
         assert(blk && blk->isValid());
@@ -98,24 +97,16 @@
 }
 
 void
-NoncoherentCache::doWritebacks(PacketList& writebacks, Tick forward_time)
+NoncoherentCache::doWritebacks(PacketPtr pkt, Tick forward_time)
 {
-    while (!writebacks.empty()) {
-        PacketPtr wb_pkt = writebacks.front();
-        allocateWriteBuffer(wb_pkt, forward_time);
-        writebacks.pop_front();
-    }
+    allocateWriteBuffer(pkt, forward_time);
 }
 
 void
-NoncoherentCache::doWritebacksAtomic(PacketList& writebacks)
+NoncoherentCache::doWritebacksAtomic(PacketPtr pkt)
 {
-    while (!writebacks.empty()) {
-        PacketPtr wb_pkt = writebacks.front();
-        memSidePort.sendAtomic(wb_pkt);
-        writebacks.pop_front();
-        delete wb_pkt;
-    }
+    memSidePort.sendAtomic(pkt);
+    delete pkt;
 }
 
 void
@@ -171,8 +162,7 @@
 
 
 Cycles
-NoncoherentCache::handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
-                                      PacketList &writebacks)
+NoncoherentCache::handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk)
 {
     PacketPtr bus_pkt = createMissPacket(pkt, blk, true,
                                          pkt->isWholeLineWrite(blkSize));
@@ -197,7 +187,7 @@
         // afterall it is a read response
         DPRINTF(Cache, "Block for addr %#llx being updated in Cache\n",
                 bus_pkt->getAddr());
-        blk = handleFill(bus_pkt, blk, writebacks, allocOnFill(bus_pkt->cmd));
+        blk = handleFill(bus_pkt, blk, allocOnFill(bus_pkt->cmd));
         assert(blk);
     }
     satisfyRequest(pkt, blk);
diff --git a/src/mem/cache/noncoherent_cache.hh b/src/mem/cache/noncoherent_cache.hh
index 3da87d9..d909746 100644
--- a/src/mem/cache/noncoherent_cache.hh
+++ b/src/mem/cache/noncoherent_cache.hh
@@ -71,8 +71,7 @@
 class NoncoherentCache : public BaseCache
 {
   protected:
-    bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
-                PacketList &writebacks) override;
+    bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat) override;
 
     void handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk,
                              Tick forward_time,
@@ -80,10 +79,10 @@
 
     void recvTimingReq(PacketPtr pkt) override;
 
-    void doWritebacks(PacketList& writebacks,
+    void doWritebacks(PacketPtr pkt,
                       Tick forward_time) override;
 
-    void doWritebacksAtomic(PacketList& writebacks) override;
+    void doWritebacksAtomic(PacketPtr pkt) override;
 
     void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
                             CacheBlk *blk) override;
@@ -98,8 +97,7 @@
         panic("Unexpected timing snoop response %s", pkt->print());
     }
 
-    Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
-                               PacketList &writebacks) override;
+    Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk) override;
 
     Tick recvAtomic(PacketPtr pkt) override;