arch-arm: Add support for SVE load/store structures

Change-Id: I4d9cde18dfc3d478eacc156de6a4a9721eb9e2ff
Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13524
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
diff --git a/src/arch/arm/insts/sve_macromem.hh b/src/arch/arm/insts/sve_macromem.hh
index b365dcb..8613181 100644
--- a/src/arch/arm/insts/sve_macromem.hh
+++ b/src/arch/arm/insts/sve_macromem.hh
@@ -45,6 +45,295 @@
 
 namespace ArmISA {
 
+template <typename Element,
+         template <typename> class MicroopLdMemType,
+         template <typename> class MicroopDeIntrlvType>
+class SveLdStructSS : public PredMacroOp
+{
+  protected:
+    IntRegIndex dest;
+    IntRegIndex gp;
+    IntRegIndex base;
+    IntRegIndex offset;
+    uint8_t numregs;
+
+  public:
+    SveLdStructSS(const char* mnem, ExtMachInst machInst, OpClass __opClass,
+            IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+            IntRegIndex _offset, uint8_t _numregs)
+        : PredMacroOp(mnem, machInst, __opClass),
+          dest(_dest), gp(_gp), base(_base), offset(_offset), numregs(_numregs)
+    {
+        numMicroops = numregs * 2;
+
+        microOps = new StaticInstPtr[numMicroops];
+
+        for (int i = 0; i < numregs; ++i) {
+            microOps[i] = new MicroopLdMemType<Element>(
+                    mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+                    _gp, _base, _offset, _numregs, i);
+        }
+        for (int i = 0; i < numregs; ++i) {
+            microOps[i + numregs] = new MicroopDeIntrlvType<Element>(
+                    mnem, machInst, static_cast<IntRegIndex>((_dest + i) % 32),
+                    _numregs, i, this);
+        }
+
+        microOps[0]->setFirstMicroop();
+        microOps[numMicroops - 1]->setLastMicroop();
+
+        for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+            (*uop)->setDelayedCommit();
+        }
+    }
+
+    Fault
+    execute(ExecContext *, Trace::InstRecord *) const
+    {
+        panic("Execute method called when it shouldn't!");
+        return NoFault;
+    }
+
+    std::string
+    generateDisassembly(Addr pc, const SymbolTable *symtab) const
+    {
+        std::stringstream ss;
+        printMnemonic(ss, "", false);
+        ccprintf(ss, "{");
+        for (int i = 0; i < numregs; ++i) {
+            printVecReg(ss, (dest + i) % 32, true);
+            if (i < numregs - 1)
+                ccprintf(ss, ", ");
+        }
+        ccprintf(ss, "}, ");
+        printVecPredReg(ss, gp);
+        ccprintf(ss, "/z, [");
+        printIntReg(ss, base);
+        ccprintf(ss, ", ");
+        printIntReg(ss, offset);
+        ccprintf(ss, "]");
+        return ss.str();
+    }
+};
+
+template <typename Element,
+         template <typename> class MicroopStMemType,
+         template <typename> class MicroopIntrlvType>
+class SveStStructSS : public PredMacroOp
+{
+  protected:
+    IntRegIndex dest;
+    IntRegIndex gp;
+    IntRegIndex base;
+    IntRegIndex offset;
+    uint8_t numregs;
+
+  public:
+    SveStStructSS(const char* mnem, ExtMachInst machInst, OpClass __opClass,
+            IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+            IntRegIndex _offset, uint8_t _numregs)
+        : PredMacroOp(mnem, machInst, __opClass),
+          dest(_dest), gp(_gp), base(_base), offset(_offset), numregs(_numregs)
+    {
+        numMicroops = numregs * 2;
+
+        microOps = new StaticInstPtr[numMicroops];
+
+        for (int i = 0; i < numregs; ++i) {
+            microOps[i] = new MicroopIntrlvType<Element>(
+                    mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+                    _dest, _numregs, i, this);
+        }
+
+        for (int i = 0; i < numregs; ++i) {
+            microOps[i + numregs] = new MicroopStMemType<Element>(
+                    mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+                    _gp, _base, _offset, _numregs, i);
+        }
+
+        microOps[0]->setFirstMicroop();
+        microOps[numMicroops - 1]->setLastMicroop();
+
+        for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+            (*uop)->setDelayedCommit();
+        }
+    }
+
+    Fault
+    execute(ExecContext *, Trace::InstRecord *) const
+    {
+        panic("Execute method called when it shouldn't!");
+        return NoFault;
+    }
+
+    std::string
+    generateDisassembly(Addr pc, const SymbolTable *symtab) const
+    {
+        std::stringstream ss;
+        printMnemonic(ss, "", false);
+        ccprintf(ss, "{");
+        for (int i = 0; i < numregs; ++i) {
+            printVecReg(ss, (dest + i) % 32, true);
+            if (i < numregs - 1)
+                ccprintf(ss, ", ");
+        }
+        ccprintf(ss, "}, ");
+        printVecPredReg(ss, gp);
+        ccprintf(ss, ", [");
+        printIntReg(ss, base);
+        ccprintf(ss, ", ");
+        printIntReg(ss, offset);
+        ccprintf(ss, "]");
+        return ss.str();
+    }
+};
+
+
+template <typename Element,
+         template <typename> class MicroopLdMemType,
+         template <typename> class MicroopDeIntrlvType>
+class SveLdStructSI : public PredMacroOp
+{
+  protected:
+    IntRegIndex dest;
+    IntRegIndex gp;
+    IntRegIndex base;
+    int64_t imm;
+    uint8_t numregs;
+
+  public:
+    SveLdStructSI(const char* mnem, ExtMachInst machInst, OpClass __opClass,
+            IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+            int64_t _imm, uint8_t _numregs)
+        : PredMacroOp(mnem, machInst, __opClass),
+          dest(_dest), gp(_gp), base(_base), imm(_imm), numregs(_numregs)
+    {
+        numMicroops = numregs * 2;
+
+        microOps = new StaticInstPtr[numMicroops];
+
+        for (int i = 0; i < numregs; ++i) {
+            microOps[i] = new MicroopLdMemType<Element>(
+                    mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+                    _gp, _base, _imm, _numregs, i);
+        }
+        for (int i = 0; i < numregs; ++i) {
+            microOps[i + numregs] = new MicroopDeIntrlvType<Element>(
+                    mnem, machInst, static_cast<IntRegIndex>((_dest + i) % 32),
+                    _numregs, i, this);
+        }
+
+        microOps[0]->setFirstMicroop();
+        microOps[numMicroops - 1]->setLastMicroop();
+
+        for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+            (*uop)->setDelayedCommit();
+        }
+    }
+
+    Fault
+    execute(ExecContext *, Trace::InstRecord *) const
+    {
+        panic("Execute method called when it shouldn't!");
+        return NoFault;
+    }
+
+    std::string
+    generateDisassembly(Addr pc, const SymbolTable *symtab) const
+    {
+        std::stringstream ss;
+        printMnemonic(ss, "", false);
+        ccprintf(ss, "{");
+        for (int i = 0; i < numregs; ++i) {
+            printVecReg(ss, (dest + i) % 32, true);
+            if (i < numregs - 1)
+                ccprintf(ss, ", ");
+        }
+        ccprintf(ss, "}, ");
+        printVecPredReg(ss, gp);
+        ccprintf(ss, "/z, [");
+        printIntReg(ss, base);
+        if (imm != 0) {
+            ccprintf(ss, ", #%d, MUL VL", imm);
+        }
+        ccprintf(ss, "]");
+        return ss.str();
+    }
+};
+
+template <typename Element,
+         template <typename> class MicroopStMemType,
+         template <typename> class MicroopIntrlvType>
+class SveStStructSI : public PredMacroOp
+{
+  protected:
+    IntRegIndex dest;
+    IntRegIndex gp;
+    IntRegIndex base;
+    int64_t imm;
+    uint8_t numregs;
+
+  public:
+    SveStStructSI(const char* mnem, ExtMachInst machInst, OpClass __opClass,
+            IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+            int64_t _imm, uint8_t _numregs)
+        : PredMacroOp(mnem, machInst, __opClass),
+          dest(_dest), gp(_gp), base(_base), imm(_imm), numregs(_numregs)
+    {
+        numMicroops = numregs * 2;
+
+        microOps = new StaticInstPtr[numMicroops];
+
+        for (int i = 0; i < numregs; ++i) {
+            microOps[i] = new MicroopIntrlvType<Element>(
+                    mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+                    _dest, _numregs, i, this);
+        }
+
+        for (int i = 0; i < numregs; ++i) {
+            microOps[i + numregs] = new MicroopStMemType<Element>(
+                    mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
+                    _gp, _base, _imm, _numregs, i);
+        }
+
+        microOps[0]->setFirstMicroop();
+        microOps[numMicroops - 1]->setLastMicroop();
+
+        for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
+            (*uop)->setDelayedCommit();
+        }
+    }
+
+    Fault
+    execute(ExecContext *, Trace::InstRecord *) const
+    {
+        panic("Execute method called when it shouldn't!");
+        return NoFault;
+    }
+
+    std::string
+    generateDisassembly(Addr pc, const SymbolTable *symtab) const
+    {
+        std::stringstream ss;
+        printMnemonic(ss, "", false);
+        ccprintf(ss, "{");
+        for (int i = 0; i < numregs; ++i) {
+            printVecReg(ss, (dest + i) % 32, true);
+            if (i < numregs - 1)
+                ccprintf(ss, ", ");
+        }
+        ccprintf(ss, "}, ");
+        printVecPredReg(ss, gp);
+        ccprintf(ss, ", [");
+        printIntReg(ss, base);
+        if (imm != 0) {
+            ccprintf(ss, ", #%d, MUL VL", imm);
+        }
+        ccprintf(ss, "]");
+        return ss.str();
+    }
+};
+
 template <typename RegElemType, typename MemElemType,
           template <typename, typename> class MicroopType,
           template <typename> class FirstFaultWritebackMicroopType>
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 69d80e2..def1781 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -3123,6 +3123,18 @@
     StaticInstPtr
     decodeSveLoadStructsSS(ExtMachInst machInst)
     {
+        IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+        IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
+        uint8_t msz = bits(machInst, 24, 23);
+        uint8_t num = bits(machInst, 22, 21);
+
+        if (rm != 0x1f && num != 0) {
+            num++;
+            return decodeSveStructLoadSSInsts(msz, machInst,
+                    zt, pg, rn, rm, num);
+        }
         return new Unknown64(machInst);
     }  // decodeSveLoadStructsSS
 
@@ -3135,6 +3147,19 @@
     StaticInstPtr
     decodeSveLoadStructsSI(ExtMachInst machInst)
     {
+        IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        int64_t imm = sext<4>(bits(machInst, 19, 16));
+        IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
+        uint8_t msz = bits(machInst, 24, 23);
+        uint8_t num = bits(machInst, 22, 21);
+
+        if (num != 0) {
+            num++;
+            imm *= num;
+            return decodeSveStructLoadSIInsts(msz, machInst,
+                    zt, pg, rn, imm, num);
+        }
         return new Unknown64(machInst);
     }  // decodeSveLoadStructsSI
 
@@ -3331,12 +3356,37 @@
     StaticInstPtr
     decodeSveStoreStructsSS(ExtMachInst machInst)
     {
+        IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+        IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
+        uint8_t msz = bits(machInst, 24, 23);
+        uint8_t num = bits(machInst, 22, 21);
+
+        if (rm != 0x1f && num != 0) {
+            num++;
+            return decodeSveStructStoreSSInsts(msz, machInst,
+                    zt, pg, rn, rm, num);
+        }
         return new Unknown64(machInst);
     }  // decodeSveStoreStructsSS
 
     StaticInstPtr
     decodeSveStoreStructsSI(ExtMachInst machInst)
     {
+        IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        int64_t imm = sext<4>(bits(machInst, 19, 16));
+        IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
+        uint8_t msz = bits(machInst, 24, 23);
+        uint8_t num = bits(machInst, 22, 21);
+
+        if (num != 0) {
+            num++;
+            imm *= num;
+            return decodeSveStructStoreSIInsts(msz, machInst,
+                    zt, pg, rn, imm, num);
+        }
         return new Unknown64(machInst);
     }  // decodeSveStoreStructsSI
 
diff --git a/src/arch/arm/isa/insts/sve_mem.isa b/src/arch/arm/isa/insts/sve_mem.isa
index e776deb..32a078d 100644
--- a/src/arch/arm/isa/insts/sve_mem.isa
+++ b/src/arch/arm/isa/insts/sve_mem.isa
@@ -204,6 +204,238 @@
 
 output decoder {{
 
+    template <class etype>
+    StaticInstPtr
+    decodeSveStructLoadSIInstsByNReg(uint8_t esize, ExtMachInst machInst,
+            IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+            int64_t imm, int numregs)
+    {
+        static const char* nm[5][4] = {
+            { nullptr, nullptr, nullptr, nullptr},
+            { nullptr, nullptr, nullptr, nullptr},
+            { "ld2b", "ld2h", "ld2w", "ld2d" },
+            { "ld3b", "ld3h", "ld3w", "ld3d" },
+            { "ld4b", "ld4h", "ld4w", "ld4d" } };
+
+        switch (numregs) {
+            case 2:
+                return new SveLdStructSI<etype,
+                        SveLoadRegImmMicroop,
+                        SveDeIntrlv2Microop>(
+                               nm[numregs][esize], machInst, MemReadOp,
+                               zt, pg, xn, imm, numregs);
+            case 3:
+                return new SveLdStructSI<etype,
+                        SveLoadRegImmMicroop,
+                        SveDeIntrlv3Microop>(
+                               nm[numregs][esize], machInst, MemReadOp,
+                               zt, pg, xn, imm, numregs);
+            case 4:
+                return new SveLdStructSI<etype,
+                        SveLoadRegImmMicroop,
+                        SveDeIntrlv4Microop>(
+                               nm[numregs][esize], machInst, MemReadOp,
+                               zt, pg, xn, imm, numregs);
+        }
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSveStructLoadSIInsts(uint8_t esize, ExtMachInst machInst,
+            IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+            int64_t imm, int numregs)
+    {
+        switch (esize) {
+            case 0:
+                return decodeSveStructLoadSIInstsByNReg<uint8_t>(esize,
+                        machInst, zt, pg, xn, imm, numregs);
+            case 1:
+                return decodeSveStructLoadSIInstsByNReg<uint16_t>(esize,
+                        machInst, zt, pg, xn, imm, numregs);
+            case 2:
+                return decodeSveStructLoadSIInstsByNReg<uint32_t>(esize,
+                        machInst, zt, pg, xn, imm, numregs);
+            case 3:
+                return decodeSveStructLoadSIInstsByNReg<uint64_t>(esize,
+                        machInst, zt, pg, xn, imm, numregs);
+        }
+        return new Unknown64(machInst);
+    }
+
+    template <class etype>
+    StaticInstPtr
+    decodeSveStructStoreSIInstsByNReg(uint8_t esize, ExtMachInst machInst,
+            IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+            int64_t imm, int numregs)
+    {
+        static const char* nm[5][4] = {
+            { nullptr, nullptr, nullptr, nullptr},
+            { nullptr, nullptr, nullptr, nullptr},
+            { "st2b", "st2h", "st2w", "st2d" },
+            { "st3b", "st3h", "st3w", "st3d" },
+            { "st4b", "st4h", "st4w", "st4d" } };
+
+        switch (numregs) {
+            case 2:
+                return new SveStStructSI<etype,
+                        SveStoreRegImmMicroop,
+                        SveIntrlv2Microop>(
+                            nm[numregs][esize], machInst, MemWriteOp,
+                            zt, pg, xn, imm, numregs);
+            case 3:
+                return new SveStStructSI<etype,
+                        SveStoreRegImmMicroop,
+                        SveIntrlv3Microop>(
+                            nm[numregs][esize], machInst, MemWriteOp,
+                            zt, pg, xn, imm, numregs);
+           case 4:
+                return new SveStStructSI<etype,
+                        SveStoreRegImmMicroop,
+                        SveIntrlv4Microop>(
+                            nm[numregs][esize], machInst, MemWriteOp,
+                            zt, pg, xn, imm, numregs);
+        }
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSveStructStoreSIInsts(uint8_t esize, ExtMachInst machInst,
+            IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+            int64_t imm, int numregs)
+    {
+        switch (esize) {
+            case 0:
+                return decodeSveStructStoreSIInstsByNReg<uint8_t>(esize,
+                    machInst, zt, pg, xn, imm, numregs);
+            case 1:
+                return decodeSveStructStoreSIInstsByNReg<uint16_t>(esize,
+                    machInst, zt, pg, xn, imm, numregs);
+            case 2:
+                return decodeSveStructStoreSIInstsByNReg<uint32_t>(esize,
+                    machInst, zt, pg, xn, imm, numregs);
+            case 3:
+                return decodeSveStructStoreSIInstsByNReg<uint64_t>(esize,
+                    machInst, zt, pg, xn, imm, numregs);
+        }
+        return new Unknown64(machInst);
+    }
+
+    template <class etype>
+    StaticInstPtr
+    decodeSveStructLoadSSInstsByNReg(uint8_t esize, ExtMachInst machInst,
+            IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+            IntRegIndex xm, int numregs)
+    {
+        static const char* nm[5][4] = {
+            { nullptr, nullptr, nullptr, nullptr},
+            { nullptr, nullptr, nullptr, nullptr},
+            { "ld2b", "ld2h", "ld2w", "ld2d" },
+            { "ld3b", "ld3h", "ld3w", "ld3d" },
+            { "ld4b", "ld4h", "ld4w", "ld4d" } };
+
+        switch (numregs) {
+            case 2:
+                return new SveLdStructSS<etype,
+                        SveLoadRegRegMicroop,
+                        SveDeIntrlv2Microop>(
+                               nm[numregs][esize], machInst, MemReadOp,
+                               zt, pg, xn, xm, numregs);
+            case 3:
+                return new SveLdStructSS<etype,
+                        SveLoadRegRegMicroop,
+                        SveDeIntrlv3Microop>(
+                               nm[numregs][esize], machInst, MemReadOp,
+                               zt, pg, xn, xm, numregs);
+            case 4:
+                return new SveLdStructSS<etype,
+                        SveLoadRegRegMicroop,
+                        SveDeIntrlv4Microop>(
+                               nm[numregs][esize], machInst, MemReadOp,
+                               zt, pg, xn, xm, numregs);
+        }
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSveStructLoadSSInsts(uint8_t esize, ExtMachInst machInst,
+            IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+            IntRegIndex xm, int numregs)
+    {
+        switch (esize) {
+            case 0:
+                return decodeSveStructLoadSSInstsByNReg<uint8_t>(esize,
+                            machInst, zt, pg, xn, xm, numregs);
+            case 1:
+                return decodeSveStructLoadSSInstsByNReg<uint16_t>(esize,
+                            machInst, zt, pg, xn, xm, numregs);
+            case 2:
+                return decodeSveStructLoadSSInstsByNReg<uint32_t>(esize,
+                            machInst, zt, pg, xn, xm, numregs);
+            case 3:
+                return decodeSveStructLoadSSInstsByNReg<uint64_t>(esize,
+                            machInst, zt, pg, xn, xm, numregs);
+        }
+        return new Unknown64(machInst);
+    }
+
+    template <class etype>
+    StaticInstPtr
+    decodeSveStructStoreSSInstsByNReg(uint8_t esize, ExtMachInst machInst,
+            IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+            IntRegIndex xm, int numregs)
+    {
+        static const char* nm[5][4] = {
+            { nullptr, nullptr, nullptr, nullptr},
+            { nullptr, nullptr, nullptr, nullptr},
+            { "st2b", "st2h", "st2w", "st2d" },
+            { "st3b", "st3h", "st3w", "st3d" },
+            { "st4b", "st4h", "st4w", "st4d" } };
+
+        switch (numregs) {
+            case 2:
+                return new SveStStructSS<etype,
+                        SveStoreRegRegMicroop,
+                        SveIntrlv2Microop>(
+                               nm[numregs][esize], machInst, MemWriteOp,
+                               zt, pg, xn, xm, numregs);
+            case 3:
+                return new SveStStructSS<etype,
+                        SveStoreRegRegMicroop,
+                        SveIntrlv3Microop>(
+                               nm[numregs][esize], machInst, MemWriteOp,
+                               zt, pg, xn, xm, numregs);
+            case 4:
+                return new SveStStructSS<etype,
+                        SveStoreRegRegMicroop,
+                        SveIntrlv4Microop>(
+                               nm[numregs][esize], machInst, MemWriteOp,
+                               zt, pg, xn, xm, numregs);
+        }
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSveStructStoreSSInsts(uint8_t esize, ExtMachInst machInst,
+            IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
+            IntRegIndex xm, int numregs)
+    {
+        switch (esize) {
+            case 0:
+                return decodeSveStructStoreSSInstsByNReg<uint8_t>(esize,
+                            machInst, zt, pg, xn, xm, numregs);
+            case 1:
+                return decodeSveStructStoreSSInstsByNReg<uint16_t>(esize,
+                            machInst, zt, pg, xn, xm, numregs);
+            case 2:
+                return decodeSveStructStoreSSInstsByNReg<uint32_t>(esize,
+                            machInst, zt, pg, xn, xm, numregs);
+            case 3:
+                return decodeSveStructStoreSSInstsByNReg<uint64_t>(esize,
+                            machInst, zt, pg, xn, xm, numregs);
+        }
+        return new Unknown64(machInst);
+    }
+
     StaticInstPtr
     decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst,
                                IntRegIndex zt, IntRegIndex pg, IntRegIndex zn,
@@ -1018,6 +1250,231 @@
         header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop)
         exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop)
 
+    def emitSveInterleaveMicroop():
+        global header_output, exec_output, decoders
+        code2 = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+                xc->tcBase());
+        for (unsigned int i = 0; i < eCount; ++i) {
+            unsigned int absIdx = regIndex * eCount + i;
+            unsigned int srcIdx = absIdx / numRegs;
+            unsigned int srcVec = absIdx % numRegs;
+            if (srcVec == 0)
+                AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx];
+            else if (srcVec == 1)
+                AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx];
+        }'''
+
+        code3 = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+                xc->tcBase());
+        for (unsigned int i = 0; i < eCount; ++i) {
+            unsigned int absIdx = regIndex * eCount + i;
+            unsigned int srcIdx = absIdx / numRegs;
+            unsigned int srcVec = absIdx % numRegs;
+            if (srcVec == 0)
+                AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx];
+            else if (srcVec == 1)
+                AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx];
+            else if (srcVec == 2)
+                AA64FpDest_x[i] = AA64FpOp1V2S_x[srcIdx];
+        }'''
+
+        code4 = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+                xc->tcBase());
+        for (unsigned int i = 0; i < eCount; ++i) {
+            unsigned int absIdx = regIndex * eCount + i;
+            unsigned int srcIdx = absIdx / numRegs;
+            unsigned int srcVec = absIdx % numRegs;
+            if (srcVec == 0)
+                AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx];
+            else if (srcVec == 1)
+                AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx];
+            else if (srcVec == 2)
+                AA64FpDest_x[i] = AA64FpOp1V2S_x[srcIdx];
+            else if (srcVec == 3)
+                AA64FpDest_x[i] = AA64FpOp1V3S_x[srcIdx];
+        }'''
+
+        iop2 = InstObjParams('intrlv',
+                'SveIntrlv2Microop',
+                'MicroOp',
+                {'code': code2},
+                ['IsMicroop'])
+        iop3 = InstObjParams('intrlv',
+                'SveIntrlv3Microop',
+                'MicroOp',
+                {'code': code3},
+                ['IsMicroop'])
+        iop4 = InstObjParams('intrlv',
+                'SveIntrlv4Microop',
+                'MicroOp',
+                {'code': code4},
+                ['IsMicroop'])
+        header_output += SveIntrlvMicroopDeclare.subst(iop2);
+        header_output += SveIntrlvMicroopDeclare.subst(iop3);
+        header_output += SveIntrlvMicroopDeclare.subst(iop4);
+        exec_output += SveIntrlvMicroopExecute.subst(iop2);
+        exec_output += SveIntrlvMicroopExecute.subst(iop3);
+        exec_output += SveIntrlvMicroopExecute.subst(iop4);
+        for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
+            for nreg in range(2,5):
+                substDict = {'targs' : type,
+                        'class_name' : 'SveIntrlv' + str(nreg) + 'Microop'}
+                exec_output += SveIntrlvMicroopExecDeclare.subst(substDict)
+
+    def emitSveDeInterleaveMicroop():
+        global header_output, exec_output, decoders
+        code2 = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+                xc->tcBase());
+        for (unsigned int i = 0; i < eCount; ++i) {
+            unsigned int absIdx = (regIndex + numRegs * i);
+            unsigned int srcIdx = absIdx % eCount;
+            unsigned int srcVec = absIdx / eCount;
+            if (srcVec == 0)
+                AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx];
+            else if(srcVec == 1)
+                AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx];
+        }'''
+
+        code3 = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+                xc->tcBase());
+        for (unsigned int i = 0; i < eCount; ++i) {
+            unsigned int absIdx = (regIndex + numRegs * i);
+            unsigned int srcIdx = absIdx % eCount;
+            unsigned int srcVec = absIdx / eCount;
+            if (srcVec == 0)
+                AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx];
+            else if(srcVec == 1)
+                AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx];
+            else if(srcVec == 2)
+                AA64FpDest_x[i] = AA64IntrlvReg2_x[srcIdx];
+        }'''
+
+        code4 = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+                xc->tcBase());
+        for (unsigned int i = 0; i < eCount; ++i) {
+            unsigned int absIdx = (regIndex + numRegs * i);
+            unsigned int srcIdx = absIdx % eCount;
+            unsigned int srcVec = absIdx / eCount;
+            if (srcVec == 0)
+                AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx];
+            else if(srcVec == 1)
+                AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx];
+            else if(srcVec == 2)
+                AA64FpDest_x[i] = AA64IntrlvReg2_x[srcIdx];
+            else if(srcVec == 3)
+                AA64FpDest_x[i] = AA64IntrlvReg3_x[srcIdx];
+        }'''
+
+        iop2 = InstObjParams('deintrlv',
+                'SveDeIntrlv2Microop',
+                'MicroOp',
+                {'code': code2},
+                ['IsMicroop'])
+        iop3 = InstObjParams('deintrlv',
+                'SveDeIntrlv3Microop',
+                'MicroOp',
+                {'code': code3},
+                ['IsMicroop'])
+        iop4 = InstObjParams('deintrlv',
+                'SveDeIntrlv4Microop',
+                'MicroOp',
+                {'code': code4},
+                ['IsMicroop'])
+        header_output += SveDeIntrlvMicroopDeclare.subst(iop2);
+        header_output += SveDeIntrlvMicroopDeclare.subst(iop3);
+        header_output += SveDeIntrlvMicroopDeclare.subst(iop4);
+        exec_output += SveIntrlvMicroopExecute.subst(iop2);
+        exec_output += SveIntrlvMicroopExecute.subst(iop3);
+        exec_output += SveIntrlvMicroopExecute.subst(iop4);
+        for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
+            for nreg in range(2,5):
+                substDict = {'targs' : type,
+                        'class_name' : 'SveDeIntrlv' + str(nreg) + 'Microop'}
+                exec_output += SveIntrlvMicroopExecDeclare.subst(substDict)
+
+    # Generates definitions for SVE struct load/store microops
+    def emitSveStructMemInsts(offsetIsImm):
+        global header_output, exec_output, decoders
+        eaCode = SPAlignmentCheckCode + '''
+        int memAccessSize = eCount * sizeof(Element);
+        EA = memAccessSize * regIndex + XBase + '''
+        if offsetIsImm:
+            eaCode += '((int64_t) this->imm * eCount * sizeof(Element))'
+        else:
+            eaCode += '(XOffset * sizeof(Element));'
+        loadMemAccCode = '''
+        for (int i = 0; i < eCount; i++) {
+            int gpIdx = (regIndex * eCount + i) / numRegs;
+            if (GpOp_x[gpIdx]) {
+                AA64FpDest_x[i] = memDataView[i];
+            } else {
+                AA64FpDest_x[i] = 0;
+            }
+        }
+        '''
+        storeMemAccCode = '''
+        for (int i = 0; i < eCount; i++) {
+            int gpIdx = (regIndex * eCount + i) / numRegs;
+            if (GpOp_x[gpIdx]) {
+                memDataView[i] = AA64FpDest_x[i];
+            } else {
+                memDataView[i] = 0;
+                for (int j = 0; j < sizeof(Element); j++) {
+                    wrEn[sizeof(Element) * i + j] = false;
+                }
+            }
+        }
+        '''
+        storeWrEnableCode = '''
+        auto wrEn = std::vector<bool>(sizeof(Element) * eCount, true);
+        '''
+        loadIop = InstObjParams('ldxx',
+            'SveLoadRegImmMicroop' if offsetIsImm else 'SveLoadRegRegMicroop',
+            'MicroOp',
+            {'targs': 'Element',
+             'memacc_code': loadMemAccCode,
+             'ea_code' : sveEnabledCheckCode + eaCode,
+             'fa_code' : ''},
+            ['IsMemRef', 'IsLoad', 'IsMicroop'])
+        storeIop = InstObjParams('stxx',
+            'SveStoreRegImmMicroop' if offsetIsImm
+                                    else 'SveStoreRegRegMicroop',
+            'MicroOp',
+            {'targs': 'Element',
+             'wren_code': storeWrEnableCode,
+             'memacc_code': storeMemAccCode,
+             'ea_code' : sveEnabledCheckCode + eaCode,
+             'fa_code' : ''},
+            ['IsMemRef', 'IsStore', 'IsMicroop'])
+        if offsetIsImm:
+            header_output += SveStructMemSIMicroopDeclare.subst(loadIop)
+            header_output += SveStructMemSIMicroopDeclare.subst(storeIop)
+        else:
+            header_output += SveStructMemSSMicroopDeclare.subst(loadIop)
+            header_output += SveStructMemSSMicroopDeclare.subst(storeIop)
+        exec_output += (
+            SveStructLoadExecute.subst(loadIop) +
+            SveStructLoadInitiateAcc.subst(loadIop) +
+            SveStructLoadCompleteAcc.subst(loadIop) +
+            SveStructStoreExecute.subst(storeIop) +
+            SveStructStoreInitiateAcc.subst(storeIop) +
+            SveStructStoreCompleteAcc.subst(storeIop))
+        tplArgs = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
+        for type in tplArgs:
+            substDict = {'targs': type,
+                         'class_name': 'SveLoadRegImmMicroop' if offsetIsImm
+                                       else 'SveLoadRegRegMicroop'}
+            exec_output += SveStructMemExecDeclare.subst(substDict)
+            substDict['class_name'] = ('SveStoreRegImmMicroop' if offsetIsImm
+                                       else 'SveStoreRegRegMicroop')
+            exec_output += SveStructMemExecDeclare.subst(substDict)
+
     # LD1[S]{B,H,W,D} (scalar plus immediate)
     # ST1[S]{B,H,W,D} (scalar plus immediate)
     # LDNF1[S]{B,H,W,D} (scalar plus immediate)
@@ -1030,6 +1487,13 @@
     # LD1R[S]{B,H,W,D}
     emitSveLoadAndRepl()
 
+    # LD{2,3,4}{B,H,W,D} (scalar plus immediate)
+    # ST{2,3,4}{B,H,W,D} (scalar plus immediate)
+    emitSveStructMemInsts(offsetIsImm = True)
+    # LD{2,3,4}{B,H,W,D} (scalar plus scalar)
+    # ST{2,3,4}{B,H,W,D} (scalar plus scalar)
+    emitSveStructMemInsts(offsetIsImm = False)
+
     # LDR (predicate), STR (predicate)
     emitSveMemFillSpill(True)
     # LDR (vector), STR (vector)
@@ -1049,4 +1513,8 @@
 
     # Source vector copy microop for gather loads
     emitSveGatherLoadCpySrcVecMicroop()
+
+    # ST/LD struct de/interleave microops
+    emitSveInterleaveMicroop()
+    emitSveDeInterleaveMicroop()
 }};
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index aaa64e7..5eae9b4 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -530,6 +530,51 @@
         'AA64FpDestQV1L':  vectorRegElem('0', 'tud', zeroing = True)
     }),
 
+    # Temporary registers for SVE interleaving
+    'AA64IntrlvReg0': vectorReg('INTRLVREG0',
+    {
+        'AA64IntrlvReg0P0': vectorRegElem('0'),
+        'AA64IntrlvReg0P1': vectorRegElem('1'),
+        'AA64IntrlvReg0P2': vectorRegElem('2'),
+        'AA64IntrlvReg0P3': vectorRegElem('3'),
+        'AA64IntrlvReg0S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64IntrlvReg0D':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64IntrlvReg0Q':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64IntrlvReg1': vectorReg('INTRLVREG1',
+    {
+        'AA64IntrlvReg1P0': vectorRegElem('0'),
+        'AA64IntrlvReg1P1': vectorRegElem('1'),
+        'AA64IntrlvReg1P2': vectorRegElem('2'),
+        'AA64IntrlvReg1P3': vectorRegElem('3'),
+        'AA64IntrlvReg1S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64IntrlvReg1D':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64IntrlvReg1Q':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64IntrlvReg2': vectorReg('INTRLVREG2',
+    {
+        'AA64IntrlvReg2P0': vectorRegElem('0'),
+        'AA64IntrlvReg2P1': vectorRegElem('1'),
+        'AA64IntrlvReg2P2': vectorRegElem('2'),
+        'AA64IntrlvReg2P3': vectorRegElem('3'),
+        'AA64IntrlvReg2S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64IntrlvReg2D':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64IntrlvReg2Q':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
+    'AA64IntrlvReg3': vectorReg('INTRLVREG3',
+    {
+        'AA64IntrlvReg3P0': vectorRegElem('0'),
+        'AA64IntrlvReg3P1': vectorRegElem('1'),
+        'AA64IntrlvReg3P2': vectorRegElem('2'),
+        'AA64IntrlvReg3P3': vectorRegElem('3'),
+        'AA64IntrlvReg3S':  vectorRegElem('0', 'sf', zeroing = True),
+        'AA64IntrlvReg3D':  vectorRegElem('0', 'df', zeroing = True),
+        'AA64IntrlvReg3Q':  vectorRegElem('0', 'tud', zeroing = True)
+    }),
+
     'AA64FpDestMerge':       vectorReg('dest',
     {
         'AA64FpDestMergeP0': vectorRegElem('0'),
diff --git a/src/arch/arm/isa/templates/sve_mem.isa b/src/arch/arm/isa/templates/sve_mem.isa
index 5e2e553..dced5f4 100644
--- a/src/arch/arm/isa/templates/sve_mem.isa
+++ b/src/arch/arm/isa/templates/sve_mem.isa
@@ -815,3 +815,449 @@
         return fault;
     }
 }};
+
+def template SveStructMemSIMicroopDeclare {{
+    template<class _Element>
+    class %(class_name)s : public %(base_class)s
+    {
+      protected:
+        typedef _Element Element;
+        typedef _Element TPElem;
+
+        IntRegIndex dest;
+        IntRegIndex gp;
+        IntRegIndex base;
+        int64_t imm;
+
+        uint8_t numRegs;
+        int regIndex;
+
+        unsigned memAccessFlags;
+
+        bool baseIsSP;
+
+      public:
+        %(class_name)s(const char* mnem, ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+            int64_t _imm, uint8_t _numRegs, int _regIndex)
+            : %(base_class)s(mnem, machInst, %(op_class)s),
+              dest(_dest), gp(_gp), base(_base), imm(_imm),
+              numRegs(_numRegs), regIndex(_regIndex),
+              memAccessFlags(ArmISA::TLB::AllowUnaligned |
+                             ArmISA::TLB::MustBeOne)
+        {
+            %(constructor)s;
+            baseIsSP = isSP(_base);
+        }
+
+        Fault execute(ExecContext *, Trace::InstRecord *) const;
+        Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
+        Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
+
+        virtual void
+        annotateFault(ArmFault *fault)
+        {
+            %(fa_code)s
+        }
+
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const
+        {
+            std::stringstream ss;
+            printMnemonic(ss, "", false);
+            ccprintf(ss, "{");
+            switch (dest) {
+                case INTRLVREG0:
+                    ccprintf(ss, "INTRLV0");
+                    break;
+                case INTRLVREG1:
+                    ccprintf(ss, "INTRLV1");
+                    break;
+                case INTRLVREG2:
+                    ccprintf(ss, "INTRLV2");
+                    break;
+                case INTRLVREG3:
+                    ccprintf(ss, "INTRLV3");
+                    break;
+                default:
+                    printVecReg(ss, dest, true);
+                    break;
+            }
+            ccprintf(ss, "}, ");
+            printVecPredReg(ss, gp);
+            if (_opClass == MemReadOp) {
+                ccprintf(ss, "/z");
+            }
+            ccprintf(ss, ", [");
+            printVecReg(ss, base, true);
+            if (imm != 0) {
+                ccprintf(ss, ", #%d", imm * sizeof(Element));
+            }
+            ccprintf(ss, "] (uop reg %d tfer)", regIndex);
+            return ss.str();
+        }
+    };
+}};
+
+def template SveStructMemExecDeclare {{
+    template
+    Fault %(class_name)s<%(targs)s>::execute(ExecContext *,
+        Trace::InstRecord *) const;
+
+    template
+    Fault %(class_name)s<%(targs)s>::initiateAcc(ExecContext *,
+        Trace::InstRecord *) const;
+
+    template
+    Fault %(class_name)s<%(targs)s>::completeAcc(PacketPtr,
+        ExecContext *, Trace::InstRecord *) const;
+}};
+
+def template SveStructLoadExecute {{
+    template <class Element>
+    Fault %(class_name)s<Element>::execute(ExecContext *xc,
+        Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+        bool aarch64 M5_VAR_USED = true;
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+            xc->tcBase());
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        TheISA::VecRegContainer memData;
+        auto memDataView = memData.as<Element>();
+
+        if (fault == NoFault) {
+            fault = xc->readMem(EA, memData.raw_ptr<uint8_t>(), memAccessSize,
+                this->memAccessFlags);
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SveStructLoadInitiateAcc {{
+    template <class Element>
+    Fault %(class_name)s<Element>::initiateAcc(ExecContext *xc,
+        Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+        bool aarch64 M5_VAR_USED = true;
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+            xc->tcBase());
+
+        %(op_src_decl)s;
+        %(op_rd)s;
+
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            fault = xc->initiateMemRead(EA, memAccessSize,
+                this->memAccessFlags);
+        }
+
+        return fault;
+    }
+}};
+
+def template SveStructLoadCompleteAcc {{
+    template <class Element>
+    Fault %(class_name)s<Element>::completeAcc(PacketPtr pkt,
+        ExecContext *xc, Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        bool aarch64 M5_VAR_USED = true;
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+            xc->tcBase());
+
+        %(op_decl)s;
+        %(op_rd)s;
+
+        TheISA::VecRegContainer memData;
+        auto memDataView = memData.as<Element>();
+
+        memcpy(memData.raw_ptr<uint8_t>(), pkt->getPtr<uint8_t>(),
+            pkt->getSize());
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SveStructStoreExecute {{
+    template <class Element>
+    Fault %(class_name)s<Element>::execute(ExecContext *xc,
+        Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+        bool aarch64 M5_VAR_USED = true;
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+            xc->tcBase());
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        TheISA::VecRegContainer memData;
+        auto memDataView = memData.as<Element>();
+
+        %(wren_code)s;
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            fault = xc->writeMem(memData.raw_ptr<uint8_t>(), memAccessSize, EA,
+                this->memAccessFlags, NULL, wrEn);
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SveStructStoreInitiateAcc {{
+    template <class Element>
+    Fault %(class_name)s<Element>::initiateAcc(ExecContext *xc,
+        Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+        bool aarch64 M5_VAR_USED = true;
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+            xc->tcBase());
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        TheISA::VecRegContainer memData;
+        auto memDataView = memData.as<Element>();
+
+        %(wren_code)s;
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            fault = xc->writeMem(memData.raw_ptr<uint8_t>(), memAccessSize, EA,
+                this->memAccessFlags, NULL, wrEn);
+        }
+
+        return fault;
+    }
+}};
+
+def template SveStructStoreCompleteAcc {{
+    template <class Element>
+    Fault %(class_name)s<Element>::completeAcc(PacketPtr pkt,
+        ExecContext *xc, Trace::InstRecord *traceData) const
+    {
+        return NoFault;
+    }
+}};
+
+def template SveStructMemSSMicroopDeclare {{
+    template <class _Element>
+    class %(class_name)s : public %(base_class)s
+    {
+      protected:
+        typedef _Element Element;
+        typedef _Element TPElem;
+
+        IntRegIndex dest;
+        IntRegIndex gp;
+        IntRegIndex base;
+        IntRegIndex offset;
+
+        uint8_t numRegs;
+        int regIndex;
+
+        unsigned memAccessFlags;
+
+        bool baseIsSP;
+
+      public:
+        %(class_name)s(const char* mnem, ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+            IntRegIndex _offset, uint8_t _numRegs, int _regIndex)
+            : %(base_class)s(mnem, machInst, %(op_class)s),
+              dest(_dest), gp(_gp), base(_base), offset(_offset),
+              numRegs(_numRegs), regIndex(_regIndex),
+              memAccessFlags(ArmISA::TLB::AllowUnaligned |
+                             ArmISA::TLB::MustBeOne)
+        {
+            %(constructor)s;
+            baseIsSP = isSP(_base);
+        }
+
+        Fault execute(ExecContext *, Trace::InstRecord *) const;
+        Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
+        Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
+
+        virtual void
+        annotateFault(ArmFault *fault)
+        {
+            %(fa_code)s
+        }
+
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const
+        {
+            std::stringstream ss;
+            printMnemonic(ss, "", false);
+            ccprintf(ss, "{");
+            switch (dest) {
+                case INTRLVREG0:
+                    ccprintf(ss, "INTRLV0");
+                    break;
+                case INTRLVREG1:
+                    ccprintf(ss, "INTRLV1");
+                    break;
+                case INTRLVREG2:
+                    ccprintf(ss, "INTRLV2");
+                    break;
+                case INTRLVREG3:
+                    ccprintf(ss, "INTRLV3");
+                    break;
+                default:
+                    printVecReg(ss, dest, true);
+                    break;
+            }
+            ccprintf(ss, "}, ");
+            printVecPredReg(ss, gp);
+            if (_opClass == MemReadOp) {
+                ccprintf(ss, "/z");
+            }
+            ccprintf(ss, ", [");
+            printIntReg(ss, base);
+            ccprintf(ss, ", ");
+            printVecReg(ss, offset, true);
+            ccprintf(ss, "] (uop reg %d tfer)", regIndex);
+            return ss.str();
+        }
+    };
+}};
+
+def template SveIntrlvMicroopDeclare {{
+    template <class _Element>
+    class %(class_name)s: public %(base_class)s
+    {
+      protected:
+        typedef _Element Element;
+        typedef _Element TPElem;
+        IntRegIndex dest;
+        IntRegIndex op1;
+        uint8_t numRegs;
+        int regIndex;
+
+        StaticInst *macroOp;
+
+      public:
+        %(class_name)s(const char* mnem, ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _op1,
+            uint8_t _numRegs, int _regIndex, StaticInst *_macroOp)
+            : MicroOp(mnem, machInst, SimdAluOp),
+            dest(_dest), op1(_op1), numRegs(_numRegs), regIndex(_regIndex),
+            macroOp(_macroOp)
+        {
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const
+        {
+            std::stringstream ss;
+            ccprintf(ss, "%s", macroOp->disassemble(pc, symtab));
+            ccprintf(ss, " (uop interleave)");
+            return ss.str();
+        }
+    };
+}};
+
+def template SveDeIntrlvMicroopDeclare {{
+    template <class _Element>
+    class %(class_name)s : public %(base_class)s
+    {
+      protected:
+        typedef _Element Element;
+        typedef _Element TPElem;
+        IntRegIndex dest;
+        uint8_t numRegs;
+        int regIndex;
+
+        StaticInst *macroOp;
+
+      public:
+        %(class_name)s(const char* mnem, ExtMachInst machInst,
+            IntRegIndex _dest, uint8_t _numRegs, int _regIndex,
+            StaticInst *_macroOp)
+            : MicroOp(mnem, machInst, SimdAluOp),
+            dest(_dest), numRegs(_numRegs), regIndex(_regIndex),
+            macroOp(_macroOp)
+        {
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const
+        {
+            std::stringstream ss;
+            ccprintf(ss, "%s", macroOp->disassemble(pc, symtab));
+            ccprintf(ss, " (uop deinterleave)");
+            return ss.str();
+        }
+    };
+}};
+
+def template SveIntrlvMicroopExecDeclare {{
+    template
+    Fault %(class_name)s<%(targs)s>::execute(
+            ExecContext *, Trace::InstRecord *) const;
+}};
+
+def template SveIntrlvMicroopExecute {{
+    template <class Element>
+    Fault %(class_name)s<Element>::execute(ExecContext *xc,
+            Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        %(op_decl)s;
+        %(op_rd)s;
+
+        %(code)s;
+        if (fault == NoFault)
+        {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh
index 3790d9d..4a8e960 100644
--- a/src/arch/arm/registers.hh
+++ b/src/arch/arm/registers.hh
@@ -85,15 +85,20 @@
 const int NumVecV8ArchRegs  = 32;
 const int NumVecSpecialRegs = 8;
 
+const int NumVecIntrlvRegs = 4;
 const int NumIntRegs = NUM_INTREGS;
 const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs;
-const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs;
+const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs + NumVecIntrlvRegs;
 const int VECREG_UREG0 = 32;
 const int NumVecPredRegs = 18;  // P0-P15, FFR, UREG0
 const int PREDREG_FFR = 16;
 const int PREDREG_UREG0 = 17;
 const int NumCCRegs = NUM_CCREGS;
 const int NumMiscRegs = NUM_MISCREGS;
+const int INTRLVREG0 = NumVecV8ArchRegs + NumVecSpecialRegs;
+const int INTRLVREG1 = INTRLVREG0 + 1;
+const int INTRLVREG2 = INTRLVREG0 + 2;
+const int INTRLVREG3 = INTRLVREG0 + 3;
 
 #define ISA_HAS_CC_REGS