arch-arm: We add PRFM PST instruction for arm

Note current PRFM supports only PLD, but PST (prefetch for store) is
also important for latency hiding. We also bug fix in disassembler to
display prfop correctly.

Change-Id: I9144e7233900aa2d555e1c1a6a2c2e41d837aa13
Signed-off-by: Yuetsu Kodama <yuetsu.kodama@riken.jp>
Reviewed-on: https://gem5-review.googlesource.com/c/13675
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
diff --git a/src/arch/arm/insts/mem64.cc b/src/arch/arm/insts/mem64.cc
index fa8fdf0..660e56e 100644
--- a/src/arch/arm/insts/mem64.cc
+++ b/src/arch/arm/insts/mem64.cc
@@ -64,7 +64,11 @@
 Memory64::startDisassembly(std::ostream &os) const
 {
     printMnemonic(os, "", false);
-    printIntReg(os, dest);
+    if (isDataPrefetch()||isInstPrefetch()){
+        printPFflags(os, dest);
+    }else{
+        printIntReg(os, dest);
+    }
     ccprintf(os, ", [");
     printIntReg(os, base);
 }
diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc
index bd6f115..f245cd4 100644
--- a/src/arch/arm/insts/static_inst.cc
+++ b/src/arch/arm/insts/static_inst.cc
@@ -324,6 +324,16 @@
     }
 }
 
+void ArmStaticInst::printPFflags(std::ostream &os, int flag) const
+{
+    const char *flagtoprfop[]= { "PLD", "PLI", "PST", "Reserved"};
+    const char *flagtotarget[] = { "L1", "L2", "L3", "Reserved"};
+    const char *flagtopolicy[] = { "KEEP", "STRM"};
+
+    ccprintf(os, "%s%s%s", flagtoprfop[(flag>>3)&3],
+             flagtotarget[(flag>>1)&3], flagtopolicy[flag&1]);
+}
+
 void
 ArmStaticInst::printFloatReg(std::ostream &os, RegIndex reg_idx) const
 {
diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa
index 7c17726..54e50d7 100644
--- a/src/arch/arm/isa/insts/ldr64.isa
+++ b/src/arch/arm/isa/insts/ldr64.isa
@@ -74,6 +74,10 @@
             elif self.flavor == "iprefetch":
                 self.memFlags.append("Request::PREFETCH")
                 self.instFlags = ['IsInstPrefetch']
+            elif self.flavor == "mprefetch":
+                self.memFlags.append("((((dest>>3)&3)==2)? \
+                     (Request::PF_EXCLUSIVE):(Request::PREFETCH))")
+                self.instFlags = ['IsDataPrefetch']
             if self.micro:
                 self.instFlags.append("IsMicroop")
 
@@ -176,7 +180,7 @@
             self.buildEACode()
 
             # Code that actually handles the access
-            if self.flavor in ("dprefetch", "iprefetch"):
+            if self.flavor in ("dprefetch", "iprefetch", "mprefetch"):
                 accCode = 'uint64_t temp M5_VAR_USED = Mem%s;'
             elif self.flavor == "fp":
                 if self.size in (1, 2, 4):
@@ -365,10 +369,11 @@
     buildLoads64("ldr", "LDRSFP64", 4, False, flavor="fp")
     buildLoads64("ldr", "LDRDFP64", 8, False, flavor="fp")
 
-    LoadImm64("prfm", "PRFM64_IMM", 8, flavor="dprefetch").emit()
-    LoadReg64("prfm", "PRFM64_REG", 8, flavor="dprefetch").emit()
-    LoadLit64("prfm", "PRFM64_LIT", 8, literal=True, flavor="dprefetch").emit()
-    LoadImm64("prfum", "PRFUM64_IMM", 8, flavor="dprefetch").emit()
+    LoadImm64("prfm", "PRFM64_IMM", 8, flavor="mprefetch").emit()
+    LoadReg64("prfm", "PRFM64_REG", 8, flavor="mprefetch").emit()
+    LoadLit64("prfm", "PRFM64_LIT", 8, literal=True,
+              flavor="mprefetch").emit()
+    LoadImm64("prfum", "PRFUM64_IMM", 8, flavor="mprefetch").emit()
 
     LoadImm64("ldurb", "LDURB64_IMM", 1, False).emit()
     LoadImm64("ldursb", "LDURSBW64_IMM", 1, True).emit()
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index ed23ffd..7bb0e0f 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -1663,7 +1663,7 @@
 
 // should writebacks be included here?  prior code was inconsistent...
 #define SUM_NON_DEMAND(s) \
-    (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq])
+    (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq] + s[MemCmd::SoftPFExReq])
 
     demandHits
         .name(name() + ".demand_hits")
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 866bc90..4369e16 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -105,6 +105,9 @@
     /* SoftPFReq */
     { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
             SoftPFResp, "SoftPFReq" },
+    /* SoftPFExReq */
+    { SET6(IsRead, NeedsWritable, IsInvalidate, IsRequest,
+           IsSWPrefetch, NeedsResponse), SoftPFResp, "SoftPFExReq" },
     /* HardPFReq */
     { SET5(IsRead, IsRequest, IsHWPrefetch, NeedsResponse, FromCache),
             HardPFResp, "HardPFReq" },
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index f0b7c2f..c59db36 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -94,6 +94,7 @@
         WriteClean,            // writes dirty data below without evicting
         CleanEvict,
         SoftPFReq,
+        SoftPFExReq,
         HardPFReq,
         SoftPFResp,
         HardPFResp,
@@ -859,6 +860,8 @@
     {
         if (req->isLLSC())
             return MemCmd::LoadLockedReq;
+        else if (req->isPrefetchEx())
+            return MemCmd::SoftPFExReq;
         else if (req->isPrefetch())
             return MemCmd::SoftPFReq;
         else
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 3df29aa..2a53c21 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -860,7 +860,9 @@
     bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
     bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
     bool isInstFetch() const { return _flags.isSet(INST_FETCH); }
-    bool isPrefetch() const { return _flags.isSet(PREFETCH); }
+    bool isPrefetch() const { return (_flags.isSet(PREFETCH) ||
+                                      _flags.isSet(PF_EXCLUSIVE)); }
+    bool isPrefetchEx() const { return _flags.isSet(PF_EXCLUSIVE); }
     bool isLLSC() const { return _flags.isSet(LLSC); }
     bool isPriv() const { return _flags.isSet(PRIVILEGED); }
     bool isLockedRMW() const { return _flags.isSet(LOCKED_RMW); }