arch-arm: Fix big endian support in {Load,Store}Double64

{Load, Store}Double64 didn't consider some of the big-endian
situations. Added big-endian related data conversions to correct them.

Change-Id: I8840613f94446e6042276779d1f02350ab57987f
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/8145
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa
index e035e1d..8c966e4 100644
--- a/src/arch/arm/isa/insts/ldr64.isa
+++ b/src/arch/arm/isa/insts/ldr64.isa
@@ -228,23 +228,31 @@
                 if self.size == 4:
                     accCode = '''
                         uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase()));
-                        AA64FpDestP0_uw = (uint32_t)data;
+                        AA64FpDestP0_uw = isBigEndian64(xc->tcBase())
+                                            ? (data >> 32)
+                                            : (uint32_t)data;
                         AA64FpDestP1_uw = 0;
                         AA64FpDestP2_uw = 0;
                         AA64FpDestP3_uw = 0;
-                        AA64FpDest2P0_uw = (data >> 32);
+                        AA64FpDest2P0_uw = isBigEndian64(xc->tcBase())
+                                            ? (uint32_t)data
+                                            : (data >> 32);
                         AA64FpDest2P1_uw = 0;
                         AA64FpDest2P2_uw = 0;
                         AA64FpDest2P3_uw = 0;
                     '''
                 elif self.size == 8:
                     accCode = '''
-                        AA64FpDestP0_uw = (uint32_t)Mem_tud[0];
-                        AA64FpDestP1_uw = (uint32_t)(Mem_tud[0] >> 32);
+                        uint64_t data_a = cSwap(Mem_tud[0],
+                                                isBigEndian64(xc->tcBase()));
+                        uint64_t data_b = cSwap(Mem_tud[1],
+                                                isBigEndian64(xc->tcBase()));
+                        AA64FpDestP0_uw = (uint32_t)data_a;
+                        AA64FpDestP1_uw = (uint32_t)(data_a >> 32);
                         AA64FpDestP2_uw = 0;
                         AA64FpDestP3_uw = 0;
-                        AA64FpDest2P0_uw = (uint32_t)Mem_tud[1];
-                        AA64FpDest2P1_uw = (uint32_t)(Mem_tud[1] >> 32);
+                        AA64FpDest2P0_uw = (uint32_t)data_b;
+                        AA64FpDest2P1_uw = (uint32_t)(data_b >> 32);
                         AA64FpDest2P2_uw = 0;
                         AA64FpDest2P3_uw = 0;
                     '''
@@ -254,26 +262,38 @@
                         accCode = '''
                             uint64_t data = cSwap(Mem_ud,
                                                   isBigEndian64(xc->tcBase()));
-                            XDest = sext<32>((uint32_t)data);
-                            XDest2 = sext<32>(data >> 32);
+                            XDest = isBigEndian64(xc->tcBase())
+                                    ? sext<32>(data >> 32)
+                                    : sext<32>((uint32_t)data);
+                            XDest2 = isBigEndian64(xc->tcBase())
+                                     ? sext<32>((uint32_t)data)
+                                     : sext<32>(data >> 32);
                         '''
                     elif self.size == 8:
                         accCode = '''
-                            XDest = Mem_tud[0];
-                            XDest2 = Mem_tud[1];
+                            XDest = cSwap(Mem_tud[0],
+                                          isBigEndian64(xc->tcBase()));
+                            XDest2 = cSwap(Mem_tud[1],
+                                           isBigEndian64(xc->tcBase()));
                         '''
                 else:
                     if self.size == 4:
                         accCode = '''
                             uint64_t data = cSwap(Mem_ud,
                                                   isBigEndian64(xc->tcBase()));
-                            XDest = (uint32_t)data;
-                            XDest2 = data >> 32;
+                            XDest = isBigEndian64(xc->tcBase())
+                                    ? (data >> 32)
+                                    : (uint32_t)data;
+                            XDest2 = isBigEndian64(xc->tcBase())
+                                    ? (uint32_t)data
+                                    : (data >> 32);
                         '''
                     elif self.size == 8:
                         accCode = '''
-                            XDest = Mem_tud[0];
-                            XDest2 = Mem_tud[1];
+                            XDest = cSwap(Mem_tud[0],
+                                          isBigEndian64(xc->tcBase()));
+                            XDest2 = cSwap(Mem_tud[1],
+                                           isBigEndian64(xc->tcBase()));
                         '''
             self.codeBlobs["memacc_code"] = accCode
 
diff --git a/src/arch/arm/isa/insts/str64.isa b/src/arch/arm/isa/insts/str64.isa
index 324d1fc..0c55b4b 100644
--- a/src/arch/arm/isa/insts/str64.isa
+++ b/src/arch/arm/isa/insts/str64.isa
@@ -212,14 +212,18 @@
             if self.flavor == "fp":
                 accCode = '''
                     uint64_t data = AA64FpDest2P0_uw;
-                    data = (data << 32) | AA64FpDestP0_uw;
+                    data = isBigEndian64(xc->tcBase())
+                            ? ((uint64_t(AA64FpDestP0_uw) << 32) | data)
+                            : ((data << 32) | AA64FpDestP0_uw);
                     Mem_ud = cSwap(data, isBigEndian64(xc->tcBase()));
                 '''
             else:
                 if self.size == 4:
                     accCode = '''
                         uint64_t data = XDest2_uw;
-                        data = (data << 32) | XDest_uw;
+                        data = isBigEndian64(xc->tcBase())
+                                ? ((uint64_t(XDest_uw) << 32) | data)
+                                : ((data << 32) | XDest_uw);
                         Mem_ud = cSwap(data, isBigEndian64(xc->tcBase()));
                     '''
                 elif self.size == 8:
@@ -227,8 +231,8 @@
                         // This temporary needs to be here so that the parser
                         // will correctly identify this instruction as a store.
                         std::array<uint64_t, 2> temp;
-                        temp[0] = XDest_ud;
-                        temp[1] = XDest2_ud;
+                        temp[0] = cSwap(XDest_ud,isBigEndian64(xc->tcBase()));
+                        temp[1] = cSwap(XDest2_ud,isBigEndian64(xc->tcBase()));
                         Mem_tud = temp;
                     '''
             self.codeBlobs["memacc_code"] = accCode