mem-ruby: Prevent response stalls on MOESI_CMP_directory

When a message triggers a transition that has actions which allocate
TBEs, the generated code automatically includes a check for the TBETable
size before executing any action. If the table is full, the transition
returns TransitionResult_ResourceStall and no more messages from the
buffer are handled (until the next cycle).

This behavior may lead to deadlocks in the MOESI_CMP_directory protocol
since events triggered by the response queue may allocate TBEs (e.g.
L2 replacements triggered by the response queue). If the table is full,
the queue is stalled preventing other responses from freeing TBEs.

This patch fixes this by handling WRITEBACK_DIRTY_DATA/CLEAN_DATA messages
as requests and WB_ACK/WB_NACK as responses. All controllers are changed
to work with the new types. With this fix, responses are always
handled first in all controllers, and no response triggers TBE
allocations.

Change-Id: I377c0ec4f06d528e9f0541daf3dcc621184f2524
Signed-off-by: Tiago Muck <tiago.muck@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18408
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-by: John Alsop <johnathan.alsop@amd.com>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
index 0d48e21..f151267 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
@@ -289,7 +301,35 @@
     }
   }
 
-  // Nothing from the request network
+  // Response Network
+  in_port(responseToL1Cache_in, ResponseMsg, responseToL1Cache) {
+    if (responseToL1Cache_in.isReady(clockEdge())) {
+      peek(responseToL1Cache_in, ResponseMsg, block_on="addr") {
+        if (in_msg.Type == CoherenceResponseType:ACK) {
+          trigger(Event:Ack, in_msg.addr,
+                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
+        } else if (in_msg.Type == CoherenceResponseType:DATA) {
+          trigger(Event:Data, in_msg.addr,
+                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
+        } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
+          trigger(Event:Exclusive_Data, in_msg.addr,
+                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
+        } else if (in_msg.Type == CoherenceResponseType:WB_ACK) {
+          trigger(Event:Writeback_Ack, in_msg.addr,
+                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
+        } else if (in_msg.Type == CoherenceResponseType:WB_ACK_DATA) {
+          trigger(Event:Writeback_Ack_Data, in_msg.addr,
+                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
+        } else if (in_msg.Type == CoherenceResponseType:WB_NACK) {
+          trigger(Event:Writeback_Nack, in_msg.addr,
+                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
+        } else {
+          error("Unexpected message");
+        }
+      }
+    }
+  }
+
 
   // Request Network
   in_port(requestNetwork_in, RequestMsg, requestToL1Cache) {
@@ -312,15 +352,6 @@
         } else if (in_msg.Type == CoherenceRequestType:DMA_READ) {
           trigger(Event:Fwd_DMA, in_msg.addr,
                   getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
-          trigger(Event:Writeback_Ack, in_msg.addr,
-                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceRequestType:WB_ACK_DATA) {
-          trigger(Event:Writeback_Ack_Data, in_msg.addr,
-                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
-          trigger(Event:Writeback_Nack, in_msg.addr,
-                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
         } else if (in_msg.Type == CoherenceRequestType:INV) {
           trigger(Event:Inv, in_msg.addr,
                   getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
@@ -331,27 +362,6 @@
     }
   }
 
-  // Response Network
-  in_port(responseToL1Cache_in, ResponseMsg, responseToL1Cache) {
-    if (responseToL1Cache_in.isReady(clockEdge())) {
-      peek(responseToL1Cache_in, ResponseMsg, block_on="addr") {
-        if (in_msg.Type == CoherenceResponseType:ACK) {
-          trigger(Event:Ack, in_msg.addr,
-                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceResponseType:DATA) {
-          trigger(Event:Data, in_msg.addr,
-                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
-          trigger(Event:Exclusive_Data, in_msg.addr,
-                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
-        } else {
-          error("Unexpected message");
-        }
-      }
-    }
-  }
-
-  // Nothing from the unblock network
   // Mandatory Queue betweens Node's CPU and it's L1 caches
   in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
     if (mandatoryQueue_in.isReady(clockEdge())) {
@@ -822,18 +832,17 @@
 
   // L2 will usually request data for a writeback
   action(qq_sendWBDataFromTBEToL2, "\q", desc="Send data from TBE to L2") {
-    enqueue(responseNetwork_out, ResponseMsg, request_latency) {
+    enqueue(requestNetwork_out, RequestMsg, request_latency) {
       assert(is_valid(tbe));
       out_msg.addr := address;
-      out_msg.Sender := machineID;
-      out_msg.SenderMachine := MachineType:L1Cache;
+      out_msg.Requestor := machineID;
+      out_msg.RequestorMachine := MachineType:L1Cache;
       out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
               l2_select_low_bit, l2_select_num_bits, intToID(0)));
-      out_msg.Dirty := tbe.Dirty;
       if (tbe.Dirty) {
-        out_msg.Type := CoherenceResponseType:WRITEBACK_DIRTY_DATA;
+        out_msg.Type := CoherenceRequestType:WRITEBACK_DIRTY_DATA;
       } else {
-        out_msg.Type := CoherenceResponseType:WRITEBACK_CLEAN_DATA;
+        out_msg.Type := CoherenceRequestType:WRITEBACK_CLEAN_DATA;
       }
       out_msg.DataBlk := tbe.DataBlk;
       out_msg.MessageSize := MessageSizeType:Writeback_Data;
@@ -1281,38 +1290,38 @@
   transition({SI, OI, MI}, Writeback_Ack_Data, I) {
     qq_sendWBDataFromTBEToL2;  // always send data
     s_deallocateTBE;
-    l_popForwardQueue;
+    n_popResponseQueue;
   }
 
   transition({SI, OI, MI}, Writeback_Ack, I) {
     g_sendUnblock;
     s_deallocateTBE;
-    l_popForwardQueue;
+    n_popResponseQueue;
   }
 
   transition({MI, OI}, Writeback_Nack, OI) {
     // FIXME: This might cause deadlock by re-using the writeback
     // channel, we should handle this case differently.
     dd_issuePUTO;
-    l_popForwardQueue;
+    n_popResponseQueue;
   }
 
   // Transitions from II
   transition(II, {Writeback_Ack, Writeback_Ack_Data}, I) {
     g_sendUnblock;
     s_deallocateTBE;
-    l_popForwardQueue;
+    n_popResponseQueue;
   }
 
   // transition({II, SI}, Writeback_Nack, I) {
   transition(II, Writeback_Nack, I) {
     s_deallocateTBE;
-    l_popForwardQueue;
+    n_popResponseQueue;
   }
 
   transition(SI, Writeback_Nack) {
     dd_issuePUTS;
-    l_popForwardQueue;
+    n_popResponseQueue;
   }
 
   transition(II, Inv) {
diff --git a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
index 6252219..379e609 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
@@ -620,30 +632,12 @@
         } else if (in_msg.Type == CoherenceResponseType:UNBLOCK_EXCLUSIVE) {
           trigger(Event:Exclusive_Unblock, in_msg.addr,
                   getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceResponseType:WRITEBACK_DIRTY_DATA) {
-          Entry cache_entry := getCacheEntry(in_msg.addr);
-          if (is_invalid(cache_entry) &&
-                   L2cache.cacheAvail(in_msg.addr) == false) {
-            trigger(Event:L2_Replacement, L2cache.cacheProbe(in_msg.addr),
-                    getCacheEntry(L2cache.cacheProbe(in_msg.addr)),
-                    TBEs[L2cache.cacheProbe(in_msg.addr)]);
-          }
-          else {
-            trigger(Event:L1_WBDIRTYDATA, in_msg.addr,
-                    cache_entry, TBEs[in_msg.addr]);
-          }
-        } else if (in_msg.Type == CoherenceResponseType:WRITEBACK_CLEAN_DATA) {
-          Entry cache_entry := getCacheEntry(in_msg.addr);
-          if (is_invalid(cache_entry) &&
-                   L2cache.cacheAvail(in_msg.addr) == false) {
-            trigger(Event:L2_Replacement, L2cache.cacheProbe(in_msg.addr),
-                    getCacheEntry(L2cache.cacheProbe(in_msg.addr)),
-                    TBEs[L2cache.cacheProbe(in_msg.addr)]);
-          }
-          else {
-            trigger(Event:L1_WBCLEANDATA, in_msg.addr,
-                    cache_entry, TBEs[in_msg.addr]);
-          }
+        } else if (in_msg.Type == CoherenceResponseType:WB_ACK) {
+          trigger(Event:Writeback_Ack, in_msg.addr,
+                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
+        } else if (in_msg.Type == CoherenceResponseType:WB_NACK) {
+          trigger(Event:Writeback_Nack, in_msg.addr,
+                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
         } else if (in_msg.Type == CoherenceResponseType:DMA_ACK) {
           trigger(Event:DmaAck, in_msg.addr,
                   getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
@@ -676,12 +670,6 @@
         } else if (in_msg.Type == CoherenceRequestType:INV) {
           trigger(Event:Inv, in_msg.addr,
                   getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
-          trigger(Event:Writeback_Ack, in_msg.addr,
-                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
-          trigger(Event:Writeback_Nack, in_msg.addr,
-                  getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
         } else {
           error("Unexpected message");
         }
@@ -715,6 +703,30 @@
             trigger(Event:L1_PUTS, in_msg.addr,
                     cache_entry, TBEs[in_msg.addr]);
           }
+        } else if (in_msg.Type == CoherenceRequestType:WRITEBACK_DIRTY_DATA) {
+          Entry cache_entry := getCacheEntry(in_msg.addr);
+          if (is_invalid(cache_entry) &&
+                   L2cache.cacheAvail(in_msg.addr) == false) {
+            trigger(Event:L2_Replacement, L2cache.cacheProbe(in_msg.addr),
+                    getCacheEntry(L2cache.cacheProbe(in_msg.addr)),
+                    TBEs[L2cache.cacheProbe(in_msg.addr)]);
+          }
+          else {
+            trigger(Event:L1_WBDIRTYDATA, in_msg.addr,
+                    cache_entry, TBEs[in_msg.addr]);
+          }
+        } else if (in_msg.Type == CoherenceRequestType:WRITEBACK_CLEAN_DATA) {
+          Entry cache_entry := getCacheEntry(in_msg.addr);
+          if (is_invalid(cache_entry) &&
+                   L2cache.cacheAvail(in_msg.addr) == false) {
+            trigger(Event:L2_Replacement, L2cache.cacheProbe(in_msg.addr),
+                    getCacheEntry(L2cache.cacheProbe(in_msg.addr)),
+                    TBEs[L2cache.cacheProbe(in_msg.addr)]);
+          }
+          else {
+            trigger(Event:L1_WBCLEANDATA, in_msg.addr,
+                    cache_entry, TBEs[in_msg.addr]);
+          }
         } else {
           error("Unexpected message");
         }
@@ -1191,9 +1203,15 @@
     }
   }
 
-  action(gg_clearOwnerFromL1Response, "g\g", desc="Clear sharer from L1 response queue") {
-    peek(responseNetwork_in, ResponseMsg) {
-      removeOwnerFromDir(cache_entry, in_msg.addr, in_msg.Sender);
+  action(gg_clearSharerFromL1Request, "clsl1r", desc="Clear sharer from L1 request queue") {
+    peek(L1requestNetwork_in, RequestMsg) {
+      removeSharerFromDir(cache_entry, in_msg.addr, in_msg.Requestor);
+    }
+  }
+
+  action(gg_clearOwnerFromL1Request, "clol1r", desc="Clear owner from L1 request queue") {
+    peek(L1requestNetwork_in, RequestMsg) {
+      removeOwnerFromDir(cache_entry, in_msg.addr, in_msg.Requestor);
     }
   }
 
@@ -1330,12 +1348,11 @@
 
   action(l_writebackAckNeedData, "l", desc="Send writeback ack to L1 requesting data") {
     peek(L1requestNetwork_in, RequestMsg) {
-      enqueue( localRequestNetwork_out, RequestMsg, response_latency ) {
+      enqueue( responseNetwork_out, ResponseMsg, response_latency ) {
         out_msg.addr := in_msg.addr;
-        // out_msg.Type := CoherenceResponseType:WRITEBACK_SEND_DATA;
-        out_msg.Type := CoherenceRequestType:WB_ACK_DATA;
-        out_msg.Requestor := machineID;
-        out_msg.RequestorMachine := MachineType:L2Cache;
+        out_msg.Type := CoherenceResponseType:WB_ACK_DATA;
+        out_msg.Sender := machineID;
+        out_msg.SenderMachine := MachineType:L2Cache;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
@@ -1344,12 +1361,11 @@
 
   action(l_writebackAckDropData, "\l", desc="Send writeback ack to L1 indicating to drop data") {
     peek(L1requestNetwork_in, RequestMsg) {
-      enqueue( localRequestNetwork_out, RequestMsg, response_latency ) {
+      enqueue( responseNetwork_out, ResponseMsg, response_latency ) {
         out_msg.addr := in_msg.addr;
-        // out_msg.Type := CoherenceResponseType:WRITEBACK_ACK;
-        out_msg.Type := CoherenceRequestType:WB_ACK;
-        out_msg.Requestor := machineID;
-        out_msg.RequestorMachine := MachineType:L2Cache;
+        out_msg.Type := CoherenceResponseType:WB_ACK;
+        out_msg.Sender := machineID;
+        out_msg.SenderMachine := MachineType:L2Cache;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
@@ -1358,11 +1374,11 @@
 
   action(ll_writebackNack, "\ll", desc="Send writeback nack to L1") {
     peek(L1requestNetwork_in, RequestMsg) {
-      enqueue( localRequestNetwork_out, RequestMsg, response_latency ) {
+      enqueue( responseNetwork_out, ResponseMsg, response_latency ) {
         out_msg.addr := in_msg.addr;
-        out_msg.Type := CoherenceRequestType:WB_NACK;
-        out_msg.Requestor := machineID;
-        out_msg.RequestorMachine := MachineType:L2Cache;
+        out_msg.Type := CoherenceResponseType:WB_NACK;
+        out_msg.Sender := machineID;
+        out_msg.SenderMachine := MachineType:L2Cache;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
@@ -1429,19 +1445,18 @@
 
 
   action( qq_sendDataFromTBEToMemory, "qq", desc="Send data from TBE to directory") {
-    enqueue(responseNetwork_out, ResponseMsg, response_latency) {
+    enqueue(globalRequestNetwork_out, RequestMsg, response_latency) {
       assert(is_valid(tbe));
       out_msg.addr := address;
-      out_msg.Sender := machineID;
-      out_msg.SenderMachine := MachineType:L2Cache;
+      out_msg.Requestor := machineID;
+      out_msg.RequestorMachine := MachineType:L2Cache;
       out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
-      out_msg.Dirty := tbe.Dirty;
       if (tbe.Dirty) {
-        out_msg.Type := CoherenceResponseType:WRITEBACK_DIRTY_DATA;
+        out_msg.Type := CoherenceRequestType:WRITEBACK_DIRTY_DATA;
         out_msg.DataBlk := tbe.DataBlk;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
       } else {
-         out_msg.Type := CoherenceResponseType:WRITEBACK_CLEAN_ACK;
+         out_msg.Type := CoherenceRequestType:WRITEBACK_CLEAN_ACK;
         // NOTE: in a real system this would not send data.  We send
         // data here only so we can check it at the memory
          out_msg.DataBlk := tbe.DataBlk;
@@ -1492,15 +1507,23 @@
   }
 
 
-  action(u_writeDataToCache, "u", desc="Write data to cache") {
-    peek(responseNetwork_in, ResponseMsg) {
+  action(u_writeCleanDataToCache, "wCd", desc="Write clean data to cache") {
+    peek(L1requestNetwork_in, RequestMsg) {
       assert(is_valid(cache_entry));
       cache_entry.DataBlk := in_msg.DataBlk;
       DPRINTF(RubySlicc, "Address: %#x, Data Block: %s\n",
             address, cache_entry.DataBlk);
-      if ((cache_entry.Dirty == false) && in_msg.Dirty) {
-        cache_entry.Dirty := in_msg.Dirty;
-      }
+      assert(cache_entry.Dirty == false);
+    }
+  }
+
+  action(u_writeDirtyDataToCache, "wDd", desc="Write dirty data to cache") {
+    peek(L1requestNetwork_in, RequestMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+      DPRINTF(RubySlicc, "Address: %#x, Data Block: %s\n",
+            address, cache_entry.DataBlk);
+      cache_entry.Dirty := true;
     }
   }
 
@@ -1538,13 +1561,20 @@
     localDirectory.deallocate(address);
   }
 
-  action(zz_recycleRequestQueue, "\zz", desc="Send the head of the mandatory queue to the back of the queue.") {
+  action(zz_recycleGlobalRequestQueue, "\zglb", desc="Send the head of the mandatory queue to the back of the queue.") {
     peek(requestNetwork_in, RequestMsg) {
       APPEND_TRANSITION_COMMENT(in_msg.Requestor);
     }
     requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
   }
 
+  action(zz_recycleL1RequestQueue, "\zl1", desc="Send the head of the mandatory queue to the back of the queue.") {
+    peek(L1requestNetwork_in, RequestMsg) {
+      APPEND_TRANSITION_COMMENT(in_msg.Requestor);
+    }
+    L1requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
+  }
+
   action(zz_recycleResponseQueue, "\z\z", desc="Send the head of the mandatory queue to the back of the queue.") {
     peek(responseNetwork_in, ResponseMsg) {
       APPEND_TRANSITION_COMMENT(in_msg.Sender);
@@ -1586,23 +1616,23 @@
   }
 
   transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS,  IGS, IGM, IGMLS, IGMO, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) {
-    zz_recycleResponseQueue;
+    zz_recycleL1RequestQueue;
   }
 
   transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) {
-    zz_recycleRequestQueue;
+    zz_recycleGlobalRequestQueue;
   }
 
   transition({OGMIO, IGMIO, IGMO}, Fwd_DMA) {
-    zz_recycleRequestQueue;
+    zz_recycleGlobalRequestQueue;
   }
 
   transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) {
-    zz_recycleRequestQueue;
+    zz_recycleGlobalRequestQueue;
   }
 
   transition({IGM, IGS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Own_GETX}) {
-    zz_recycleRequestQueue;
+    zz_recycleGlobalRequestQueue;
   }
 
   // must happened because we forwarded GETX to local exclusive trying to do wb
@@ -2643,8 +2673,8 @@
     gg_clearLocalSharers;
     vv_allocateL2CacheBlock;
     y_copyDirToCacheAndRemove;
-    u_writeDataToCache;
-    n_popResponseQueue;
+    u_writeDirtyDataToCache;
+    o_popL1RequestQueue;
     wa_wakeUpDependents;
   }
 
@@ -2653,8 +2683,8 @@
     gg_clearLocalSharers;
     vv_allocateL2CacheBlock;
     y_copyDirToCacheAndRemove;
-    u_writeDataToCache;
-    n_popResponseQueue;
+    u_writeCleanDataToCache;
+    o_popL1RequestQueue;
     wa_wakeUpDependents;
   }
 
@@ -2667,67 +2697,93 @@
   transition(ILSW, L1_WBCLEANDATA, SLS) {
     vv_allocateL2CacheBlock;
     y_copyDirToCacheAndRemove;
-    u_writeDataToCache;
-    gg_clearSharerFromL1Response;
-    n_popResponseQueue;
+    u_writeCleanDataToCache;
+    gg_clearSharerFromL1Request;
+    o_popL1RequestQueue;
     wa_wakeUpDependents;
   }
 
   transition(IW, L1_WBCLEANDATA, S) {
     vv_allocateL2CacheBlock;
     y_copyDirToCacheAndRemove;
-    u_writeDataToCache;
-    gg_clearSharerFromL1Response;
-    n_popResponseQueue;
+    u_writeCleanDataToCache;
+    gg_clearSharerFromL1Request;
+    o_popL1RequestQueue;
     wa_wakeUpDependents;
   }
 
   // Owner can have dirty data
-  transition(ILOW, {L1_WBCLEANDATA, L1_WBDIRTYDATA}, O) {
+  transition(ILOW, L1_WBDIRTYDATA, O) {
     vv_allocateL2CacheBlock;
     y_copyDirToCacheAndRemove;
-    gg_clearOwnerFromL1Response;
-    u_writeDataToCache;
-    n_popResponseQueue;
+    gg_clearOwnerFromL1Request;
+    u_writeDirtyDataToCache;
+    o_popL1RequestQueue;
+    wa_wakeUpDependents;
+  }
+
+  transition(ILOW, L1_WBCLEANDATA, O) {
+    vv_allocateL2CacheBlock;
+    y_copyDirToCacheAndRemove;
+    gg_clearOwnerFromL1Request;
+    u_writeCleanDataToCache;
+    o_popL1RequestQueue;
     wa_wakeUpDependents;
   }
 
   transition(ILOXW, L1_WBDIRTYDATA, M) {
     vv_allocateL2CacheBlock;
     y_copyDirToCacheAndRemove;
-    gg_clearOwnerFromL1Response;
-    u_writeDataToCache;
-    n_popResponseQueue;
+    gg_clearOwnerFromL1Request;
+    u_writeDirtyDataToCache;
+    o_popL1RequestQueue;
     wa_wakeUpDependents;
   }
 
   transition(ILOXW, L1_WBCLEANDATA, M) {
     vv_allocateL2CacheBlock;
     y_copyDirToCacheAndRemove;
-    gg_clearOwnerFromL1Response;
-    u_writeDataToCache;
-    n_popResponseQueue;
+    gg_clearOwnerFromL1Request;
+    u_writeCleanDataToCache;
+    o_popL1RequestQueue;
     wa_wakeUpDependents;
   }
 
-  transition(ILOSW, {L1_WBCLEANDATA, L1_WBDIRTYDATA}, OLS) {
+  transition(ILOSW, L1_WBDIRTYDATA, OLS) {
     vv_allocateL2CacheBlock;
     y_copyDirToCacheAndRemove;
-    gg_clearOwnerFromL1Response;
-    u_writeDataToCache;
-    n_popResponseQueue;
+    gg_clearOwnerFromL1Request;
+    u_writeDirtyDataToCache;
+    o_popL1RequestQueue;
     wa_wakeUpDependents;
   }
 
-  transition(ILOSXW, {L1_WBCLEANDATA, L1_WBDIRTYDATA}, OLSX) {
+  transition(ILOSW, L1_WBCLEANDATA, OLS) {
     vv_allocateL2CacheBlock;
     y_copyDirToCacheAndRemove;
-    gg_clearOwnerFromL1Response;
-    u_writeDataToCache;
-    n_popResponseQueue;
+    gg_clearOwnerFromL1Request;
+    u_writeCleanDataToCache;
+    o_popL1RequestQueue;
     wa_wakeUpDependents;
   }
 
+  transition(ILOSXW, L1_WBDIRTYDATA, OLSX) {
+    vv_allocateL2CacheBlock;
+    y_copyDirToCacheAndRemove;
+    gg_clearOwnerFromL1Request;
+    u_writeDirtyDataToCache;
+    o_popL1RequestQueue;
+    wa_wakeUpDependents;
+  }
+
+  transition(ILOSXW, L1_WBCLEANDATA, OLSX) {
+    vv_allocateL2CacheBlock;
+    y_copyDirToCacheAndRemove;
+    gg_clearOwnerFromL1Request;
+    u_writeCleanDataToCache;
+    o_popL1RequestQueue;
+    wa_wakeUpDependents;
+  }
 
   transition(SLSW, {Unblock}, SLS) {
     gg_clearSharerFromL1Response;
@@ -2838,39 +2894,39 @@
   transition({MI, OI}, Writeback_Ack, I) {
     qq_sendDataFromTBEToMemory;
     s_deallocateTBE;
-    m_popRequestQueue;
+    n_popResponseQueue;
     wa_wakeUpDependents;
   }
 
   transition(MII, Writeback_Nack, I) {
     s_deallocateTBE;
-    m_popRequestQueue;
+    n_popResponseQueue;
     wa_wakeUpDependents;
   }
 
   transition(OI, Writeback_Nack) {
     b_issuePUTO;
-    m_popRequestQueue;
+    n_popResponseQueue;
   }
 
   transition(OLSI, Writeback_Ack, ILS) {
     qq_sendDataFromTBEToMemory;
     s_deallocateTBE;
-    m_popRequestQueue;
+    n_popResponseQueue;
     wa_wakeUpDependents;
   }
 
   transition(MII, Writeback_Ack, I) {
     f_sendUnblock;
     s_deallocateTBE;
-    m_popRequestQueue;
+    n_popResponseQueue;
     wa_wakeUpDependents;
   }
 
   transition(ILSI, Writeback_Ack, ILS) {
     f_sendUnblock;
     s_deallocateTBE;
-    m_popRequestQueue;
+    n_popResponseQueue;
     wa_wakeUpDependents;
   }
 }
diff --git a/src/mem/protocol/MOESI_CMP_directory-dir.sm b/src/mem/protocol/MOESI_CMP_directory-dir.sm
index 9b73a2b..04e2888 100644
--- a/src/mem/protocol/MOESI_CMP_directory-dir.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-dir.sm
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
@@ -243,12 +255,6 @@
         } else if (in_msg.Type == CoherenceResponseType:UNBLOCK_EXCLUSIVE) {
           trigger(Event:Exclusive_Unblock, in_msg.addr,
                   TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceResponseType:WRITEBACK_DIRTY_DATA) {
-          trigger(Event:Dirty_Writeback, in_msg.addr,
-                  TBEs[in_msg.addr]);
-        } else if (in_msg.Type == CoherenceResponseType:WRITEBACK_CLEAN_ACK) {
-          trigger(Event:Clean_Writeback, in_msg.addr,
-                  TBEs[in_msg.addr]);
         } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
           trigger(Event:Data, in_msg.addr,
                   TBEs[in_msg.addr]);
@@ -275,6 +281,12 @@
           trigger(Event:PUTO, in_msg.addr, TBEs[in_msg.addr]);
         } else if (in_msg.Type == CoherenceRequestType:PUTO_SHARERS) {
           trigger(Event:PUTO_SHARERS, in_msg.addr, TBEs[in_msg.addr]);
+        } else if (in_msg.Type == CoherenceRequestType:WRITEBACK_DIRTY_DATA) {
+          trigger(Event:Dirty_Writeback, in_msg.addr,
+                  TBEs[in_msg.addr]);
+        } else if (in_msg.Type == CoherenceRequestType:WRITEBACK_CLEAN_ACK) {
+          trigger(Event:Clean_Writeback, in_msg.addr,
+                  TBEs[in_msg.addr]);
         } else if (in_msg.Type == CoherenceRequestType:DMA_READ) {
           trigger(Event:DMA_READ, makeLineAddress(in_msg.addr),
                   TBEs[makeLineAddress(in_msg.addr)]);
@@ -308,11 +320,11 @@
 
   action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") {
     peek(requestQueue_in, RequestMsg) {
-      enqueue(forwardNetwork_out, RequestMsg, directory_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, directory_latency) {
         out_msg.addr := address;
-        out_msg.Type := CoherenceRequestType:WB_ACK;
-        out_msg.Requestor := in_msg.Requestor;
-        out_msg.RequestorMachine := MachineType:Directory;
+        out_msg.Type := CoherenceResponseType:WB_ACK;
+        out_msg.Sender := in_msg.Requestor;
+        out_msg.SenderMachine := MachineType:Directory;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
@@ -321,11 +333,11 @@
 
   action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") {
     peek(requestQueue_in, RequestMsg) {
-      enqueue(forwardNetwork_out, RequestMsg, directory_latency) {
+      enqueue(responseNetwork_out, ResponseMsg, directory_latency) {
         out_msg.addr := address;
-        out_msg.Type := CoherenceRequestType:WB_NACK;
-        out_msg.Requestor := in_msg.Requestor;
-        out_msg.RequestorMachine := MachineType:Directory;
+        out_msg.Type := CoherenceResponseType:WB_NACK;
+        out_msg.Sender := in_msg.Requestor;
+        out_msg.SenderMachine := MachineType:Directory;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
       }
@@ -472,13 +484,13 @@
     }
   }
 
-  action(qw_queueMemoryWBRequest, "qw", desc="Queue off-chip writeback request") {
-    peek(unblockNetwork_in, ResponseMsg) {
+  action(qw_queueMemoryWBFromCacheRequest, "qw", desc="Queue off-chip writeback request") {
+    peek(requestQueue_in, RequestMsg) {
       if (is_valid(tbe)) {
         queueMemoryWrite(tbe.Requestor, address, to_memory_controller_latency,
                          in_msg.DataBlk);
       } else {
-        queueMemoryWrite(in_msg.Sender, address, to_memory_controller_latency,
+        queueMemoryWrite(in_msg.Requestor, address, to_memory_controller_latency,
                          in_msg.DataBlk);
       }
     }
@@ -495,7 +507,7 @@
     }
   }
 
-  action(qw_queueMemoryWBRequest2, "/qw", desc="Queue off-chip writeback request") {
+  action(qw_queueMemoryWBFromDMARequest, "/qw", desc="Queue off-chip writeback request") {
     peek(requestQueue_in, RequestMsg) {
       queueMemoryWrite(in_msg.Requestor, address, to_memory_controller_latency,
                        in_msg.DataBlk);
@@ -567,7 +579,7 @@
   }
 
   transition(I, DMA_WRITE, XI_U) {
-    qw_queueMemoryWBRequest2;
+    qw_queueMemoryWBFromDMARequest;
     a_sendDMAAck;  // ack count may be zero
     i_popIncomingRequestQueue;
   }
@@ -597,7 +609,7 @@
   }
 
   transition(S, DMA_WRITE, XI_U) {
-    qw_queueMemoryWBRequest2;
+    qw_queueMemoryWBFromDMARequest;
     a_sendDMAAck;  // ack count may be zero
     g_sendInvalidations;  // the DMA will collect invalidations
     i_popIncomingRequestQueue;
@@ -768,47 +780,47 @@
   transition(MI, Dirty_Writeback, I) {
     c_clearOwner;
     cc_clearSharers;
-    qw_queueMemoryWBRequest;
-    j_popIncomingUnblockQueue;
+    qw_queueMemoryWBFromCacheRequest;
+    i_popIncomingRequestQueue;
   }
 
   transition(MIS, Dirty_Writeback, S) {
     c_moveOwnerToSharer;
-    qw_queueMemoryWBRequest;
-    j_popIncomingUnblockQueue;
+    qw_queueMemoryWBFromCacheRequest;
+    i_popIncomingRequestQueue;
   }
 
   transition(MIS, Clean_Writeback, S) {
     c_moveOwnerToSharer;
-    j_popIncomingUnblockQueue;
+    i_popIncomingRequestQueue;
   }
 
   transition(OS, Dirty_Writeback, S) {
     c_clearOwner;
-    qw_queueMemoryWBRequest;
-    j_popIncomingUnblockQueue;
+    qw_queueMemoryWBFromCacheRequest;
+    i_popIncomingRequestQueue;
   }
 
   transition(OSS, Dirty_Writeback, S) {
     c_moveOwnerToSharer;
-    qw_queueMemoryWBRequest;
-    j_popIncomingUnblockQueue;
+    qw_queueMemoryWBFromCacheRequest;
+    i_popIncomingRequestQueue;
   }
 
   transition(OSS, Clean_Writeback, S) {
     c_moveOwnerToSharer;
-    j_popIncomingUnblockQueue;
+    i_popIncomingRequestQueue;
   }
 
   transition(MI, Clean_Writeback, I) {
     c_clearOwner;
     cc_clearSharers;
-    j_popIncomingUnblockQueue;
+    i_popIncomingRequestQueue;
   }
 
   transition(OS, Clean_Writeback, S) {
     c_clearOwner;
-    j_popIncomingUnblockQueue;
+    i_popIncomingRequestQueue;
   }
 
   transition({MI, MIS}, Unblock, M) {
diff --git a/src/mem/protocol/MOESI_CMP_directory-dma.sm b/src/mem/protocol/MOESI_CMP_directory-dma.sm
index f3f9167..16dc32a 100644
--- a/src/mem/protocol/MOESI_CMP_directory-dma.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-dma.sm
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2009-2013 Mark D. Hill and David A. Wood
  * Copyright (c) 2010-2011 Advanced Micro Devices, Inc.
  * All rights reserved.
@@ -106,22 +118,6 @@
   out_port(respToDirectory_out, ResponseMsg, respToDir, desc="...");
   out_port(triggerQueue_out, TriggerMsg, triggerQueue, desc="...");
 
-  in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") {
-    if (dmaRequestQueue_in.isReady(clockEdge())) {
-      peek(dmaRequestQueue_in, SequencerMsg) {
-        if (in_msg.Type == SequencerRequestType:LD ) {
-          trigger(Event:ReadRequest, in_msg.LineAddress,
-                  TBEs[in_msg.LineAddress]);
-        } else if (in_msg.Type == SequencerRequestType:ST) {
-          trigger(Event:WriteRequest, in_msg.LineAddress,
-                  TBEs[in_msg.LineAddress]);
-        } else {
-          error("Invalid request type");
-        }
-      }
-    }
-  }
-
   in_port(dmaResponseQueue_in, ResponseMsg, responseFromDir, desc="...") {
     if (dmaResponseQueue_in.isReady(clockEdge())) {
       peek( dmaResponseQueue_in, ResponseMsg) {
@@ -155,6 +151,22 @@
     }
   }
 
+  in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") {
+    if (dmaRequestQueue_in.isReady(clockEdge())) {
+      peek(dmaRequestQueue_in, SequencerMsg) {
+        if (in_msg.Type == SequencerRequestType:LD ) {
+          trigger(Event:ReadRequest, in_msg.LineAddress,
+                  TBEs[in_msg.LineAddress]);
+        } else if (in_msg.Type == SequencerRequestType:ST) {
+          trigger(Event:WriteRequest, in_msg.LineAddress,
+                  TBEs[in_msg.LineAddress]);
+        } else {
+          error("Invalid request type");
+        }
+      }
+    }
+  }
+
   action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") {
     peek(dmaRequestQueue_in, SequencerMsg) {
       enqueue(reqToDirectory_out, RequestMsg, request_latency) {
diff --git a/src/mem/protocol/MOESI_CMP_directory-msg.sm b/src/mem/protocol/MOESI_CMP_directory-msg.sm
index 5f6f826..7dc5822 100644
--- a/src/mem/protocol/MOESI_CMP_directory-msg.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-msg.sm
@@ -1,5 +1,16 @@
-
 /*
+ * Copyright (c) 2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 1999-2005 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
@@ -40,11 +51,10 @@
   PUTO,      desc="Put Owned";
   PUTO_SHARERS,      desc="Put Owned, but sharers exist so don't remove from sharers list";
   PUTS,      desc="Put Shared";
-  WB_ACK,    desc="Writeback ack";
-  WB_ACK_DATA,    desc="Writeback ack";
-  WB_NACK,   desc="Writeback neg. ack";
   INV,       desc="Invalidation";
-  
+  WRITEBACK_CLEAN_DATA,   desc="Clean writeback (contains data)";
+  WRITEBACK_CLEAN_ACK,    desc="Clean writeback (contains no data)";
+  WRITEBACK_DIRTY_DATA,   desc="Dirty writeback (contains data)";
   DMA_READ,  desc="DMA Read";
   DMA_WRITE, desc="DMA Write";
 }
@@ -56,9 +66,9 @@
   DATA_EXCLUSIVE,    desc="Data, no processor has a copy";
   UNBLOCK,           desc="Unblock";
   UNBLOCK_EXCLUSIVE, desc="Unblock, we're in E/M";
-  WRITEBACK_CLEAN_DATA,   desc="Clean writeback (contains data)";
-  WRITEBACK_CLEAN_ACK,   desc="Clean writeback (contains no data)";
-  WRITEBACK_DIRTY_DATA,   desc="Dirty writeback (contains data)";
+  WB_ACK,            desc="Writeback ack";
+  WB_ACK_DATA,       desc="Writeback ack";
+  WB_NACK,           desc="Writeback neg. ack";
   DMA_ACK,           desc="Ack that a DMA write completed";
 }
 
@@ -100,7 +110,9 @@
   bool functionalRead(Packet *pkt) {
     // Read only those messages that contain the data
     if (Type == CoherenceRequestType:DMA_READ ||
-        Type == CoherenceRequestType:DMA_WRITE) {
+        Type == CoherenceRequestType:DMA_WRITE ||
+        Type == CoherenceRequestType:WRITEBACK_CLEAN_DATA ||
+        Type == CoherenceRequestType:WRITEBACK_DIRTY_DATA) {
         return testAndRead(addr, DataBlk, pkt);
     }
     return false;
@@ -127,9 +139,7 @@
   bool functionalRead(Packet *pkt) {
     // Read only those messages that contain the data
     if (Type == CoherenceResponseType:DATA ||
-        Type == CoherenceResponseType:DATA_EXCLUSIVE ||
-        Type == CoherenceResponseType:WRITEBACK_CLEAN_DATA ||
-        Type == CoherenceResponseType:WRITEBACK_DIRTY_DATA) {
+        Type == CoherenceResponseType:DATA_EXCLUSIVE) {
         return testAndRead(addr, DataBlk, pkt);
     }
     return false;