arch-x86: ignore non-temporal hint for movntps/movntpd SSE insts

Making the implementation of movntps/movntpd consistent with other
non-temporal instructions. We are ignoring the hint here, and
implementing those instructions as cacheable instructions.

This change adds a warning to let user know about this workaround.
Also, this change add the address check for second part of move.

Change-Id: I811652b24cf39ca2f5c5d4c9e9e417f69190b55c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/20408
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-by: Gabe Black <gabeblack@google.com>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index 348bff3..dfb748e 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -383,6 +383,8 @@
                     0x0: MOVAPS(Vq,Wq);
                     0x1: MOVAPS(Wq,Vq);
                     0x2: CVTPI2PS(Vq,Qq);
+                    //Non-temporal hint is ignored since we don't have
+                    //proper support for it in the memory system.
                     0x3: MOVNTPS(Mq,Vq);
                     0x4: CVTTPS2PI(Pq,Wq);
                     0x5: CVTPS2PI(Pq,Wq);
@@ -401,6 +403,8 @@
                     0x0: MOVAPD(Vo,Wo);
                     0x1: MOVAPD(Wo,Vo);
                     0x2: CVTPI2PD(Vo,Qq);
+                    //Non-temporal hint is ignored since we don't have
+                    //proper support for it in the memory system.
                     0x3: MOVNTPD(Mq,Vq);
                     0x4: CVTTPD2PI(Pq,Wo);
                     0x5: CVTPD2PI(Pq,Wo);
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_non_temporal.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_non_temporal.py
index a6e392f..06dba59 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_non_temporal.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_non_temporal.py
@@ -40,29 +40,37 @@
 microcode = '''
 # movntps is basically the same as movaps, excepting the caching hint and
 # ordering constraints
+# We are ignoring the non-temporal hint.
 def macroop MOVNTPS_M_XMM {
-    # Check low address.
-    stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8, uncacheable=True
-    stfp xmml, seg, sib, disp, dataSize=8, uncacheable=True
+    warn_once "MOVNTPS: Ignoring non-temporal hint, modeling as cacheable!"
+    cda seg, sib, "DISPLACEMENT + 8", dataSize=8
+    stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    stfp xmml, seg, sib, disp, dataSize=8
 };
 
 def macroop MOVNTPS_P_XMM {
+    warn_once "MOVNTPS_P: Ignoring non-temporal hint, modeling as cacheable!"
     rdip t7
-    # Check low address.
-    stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8, uncacheable=True
-    stfp xmml, seg, riprel, disp, dataSize=8, uncacheable=True
+    cda seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    stfp xmml, seg, riprel, disp, dataSize=8
 };
 
 # movntpd is basically the same as movapd, excepting the caching hint and
 # ordering constraints
+# We are ignoring the non-temporal hint.
 def macroop MOVNTPD_M_XMM {
-    stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8, uncacheable=True
-    stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8, uncacheable=True
+    warn_once "MOVNTPD: Ignoring non-temporal hint, modeling as cacheable!"
+    cda seg, sib, "DISPLACEMENT + 8", dataSize=8
+    stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8
+    stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
 };
 
 def macroop MOVNTPD_P_XMM {
+    warn_once "MOVNTPD_P: Ignoring non-temporal hint, modeling as cacheable!"
     rdip t7
-    stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8, uncacheable=True
-    stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8, uncacheable=True
+    cda seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+    stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
 };
 '''