cpu: Add support for CMOs in the cpu models

Cache maintenance operations go through the write channel of the
cpu. This changes makes sure that the cpu does not try to fill in the
packet with data.

Change-Id: Ic83205bb1cda7967636d88f15adcb475eb38d158
Reviewed-by: Stephan Diestelhorst <stephan.diestelhorst@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/5055
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index b22fb2a..48fcb20 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011,2013 ARM Limited
+ * Copyright (c) 2011,2013,2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -310,7 +310,7 @@
    // If the request is to ZERO a cache block, there is no data to check
    // against, but it's all zero. We need something to compare to, so use a
    // const set of zeros.
-   if (flags & Request::CACHE_BLOCK_ZERO) {
+   if (flags & Request::STORE_NO_DATA) {
        assert(!data);
        assert(sizeof(zero_data) <= fullSize);
        data = zero_data;
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index b7d5360..cb0611b 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014,2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -679,8 +679,12 @@
     while (ret == NoAddrRangeCoverage && i != slots.rend()) {
         LSQRequestPtr slot = *i;
 
+        /* Cache maintenance instructions go down via the store path *
+         * but they carry no data and they shouldn't be considered for
+         * forwarding */
         if (slot->packet &&
-            slot->inst->id.threadId == request->inst->id.threadId) {
+            slot->inst->id.threadId == request->inst->id.threadId &&
+            !slot->packet->req->isCacheMaintenance()) {
             AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
 
             if (coverage != NoAddrRangeCoverage) {
@@ -1492,7 +1496,7 @@
         /* request_data becomes the property of a ...DataRequest (see below)
          *  and destroyed by its destructor */
         request_data = new uint8_t[size];
-        if (flags & Request::CACHE_BLOCK_ZERO) {
+        if (flags & Request::STORE_NO_DATA) {
             /* For cache zeroing, just use zeroed data */
             std::memset(request_data, 0, size);
         } else {
@@ -1562,10 +1566,13 @@
     if (sender_state)
         ret->pushSenderState(sender_state);
 
-    if (isLoad)
+    if (isLoad) {
         ret->allocate();
-    else
+    } else if (!request.isCacheMaintenance()) {
+        // CMOs are treated as stores but they don't have data. All
+        // stores otherwise need to allocate for data.
         ret->dataDynamic(data);
+    }
 
     return ret;
 }
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index b8e8955..a2813b3 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014 ARM Limited
+ * Copyright (c) 2012-2014,2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -650,10 +650,14 @@
 
         store_size = storeQueue[store_idx].size;
 
-        if (store_size == 0)
+        if (!store_size || storeQueue[store_idx].inst->strictlyOrdered() ||
+            (storeQueue[store_idx].req &&
+             storeQueue[store_idx].req->isCacheMaintenance())) {
+            // Cache maintenance instructions go down via the store
+            // path but they carry no data and they shouldn't be
+            // considered for forwarding
             continue;
-        else if (storeQueue[store_idx].inst->strictlyOrdered())
-            continue;
+        }
 
         assert(storeQueue[store_idx].inst->effAddrValid());
 
@@ -894,9 +898,9 @@
     storeQueue[store_idx].sreqHigh = sreqHigh;
     unsigned size = req->getSize();
     storeQueue[store_idx].size = size;
-    storeQueue[store_idx].isAllZeros = req->getFlags() & Request::CACHE_BLOCK_ZERO;
-    assert(size <= sizeof(storeQueue[store_idx].data) ||
-            (req->getFlags() & Request::CACHE_BLOCK_ZERO));
+    bool store_no_data = req->getFlags() & Request::STORE_NO_DATA;
+    storeQueue[store_idx].isAllZeros = store_no_data;
+    assert(size <= sizeof(storeQueue[store_idx].data) || store_no_data);
 
     // Split stores can only occur in ISAs with unaligned memory accesses.  If
     // a store request has been split, sreqLow and sreqHigh will be non-null.
@@ -904,7 +908,8 @@
         storeQueue[store_idx].isSplit = true;
     }
 
-    if (!(req->getFlags() & Request::CACHE_BLOCK_ZERO))
+    if (!(req->getFlags() & Request::CACHE_BLOCK_ZERO) && \
+        !req->isCacheMaintenance())
         memcpy(storeQueue[store_idx].data, data, size);
 
     // This function only writes the data to the store queue, so no fault
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index eea7615..f3596b6 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -430,7 +430,7 @@
 
     if (data == NULL) {
         assert(size <= 64);
-        assert(flags & Request::CACHE_BLOCK_ZERO);
+        assert(flags & Request::STORE_NO_DATA);
         // This must be a cache block cleaning request
         data = zero_array;
     }
@@ -462,14 +462,11 @@
 
         // Now do the access.
         if (fault == NoFault) {
-            MemCmd cmd = MemCmd::WriteReq; // default
             bool do_access = true;  // flag to suppress cache access
 
             if (req->isLLSC()) {
-                cmd = MemCmd::StoreCondReq;
                 do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask);
             } else if (req->isSwap()) {
-                cmd = MemCmd::SwapReq;
                 if (req->isCondSwap()) {
                     assert(res);
                     req->setExtraData(*res);
@@ -477,7 +474,7 @@
             }
 
             if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
-                Packet pkt = Packet(req, cmd);
+                Packet pkt(req, Packet::makeWriteCmd(req));
                 pkt.dataStatic(data);
 
                 if (req->isMmappedIpr()) {
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index c38f210..961e319 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -510,7 +510,7 @@
     BaseTLB::Mode mode = BaseTLB::Write;
 
     if (data == NULL) {
-        assert(flags & Request::CACHE_BLOCK_ZERO);
+        assert(flags & Request::STORE_NO_DATA);
         // This must be a cache block cleaning request
         memset(newData, 0, size);
     } else {
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 2586935..5cb08ca 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -202,6 +202,8 @@
          */
         STICKY_FLAGS = INST_FETCH
     };
+    static const FlagsType STORE_NO_DATA = CACHE_BLOCK_ZERO |
+        CLEAN | INVALIDATE;
 
     /** Master Ids that are statically allocated
      * @{*/