| /* |
| * Copyright (c) 2010-2014, 2020 ARM Limited |
| * All rights reserved |
| * |
| * The license below extends only to copyright in the software and shall |
| * not be construed as granting a license to any other intellectual |
| * property including but not limited to intellectual property relating |
| * to a hardware implementation of the functionality of the software |
| * licensed hereunder. You may use the software subject to the license |
| * terms below provided that you ensure that this notice is replicated |
| * unmodified and in its entirety in all distributions of the software, |
| * modified or unmodified, in source code or in binary form. |
| * |
| * Copyright (c) 2007-2008 The Florida State University |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "arch/arm/insts/macromem.hh" |
| |
| #include <sstream> |
| |
| #include "arch/arm/generated/decoder.hh" |
| #include "arch/arm/insts/neon64_mem.hh" |
| #include "base/compiler.hh" |
| |
| namespace gem5 |
| { |
| |
| using namespace ArmISAInst; |
| |
| namespace ArmISA |
| { |
| |
| MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, IntRegIndex rn, |
| bool index, bool up, bool user, bool writeback, |
| bool load, uint32_t reglist) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| uint32_t regs = reglist; |
| uint32_t ones = number_of_ones(reglist); |
| uint32_t mem_ops = ones; |
| |
| // Copy the base address register if we overwrite it, or if this instruction |
| // is basically a no-op (we have to do something) |
| bool copy_base = (bits(reglist, rn) && load) || !ones; |
| bool force_user = user & !bits(reglist, 15); |
| bool exception_ret = user & bits(reglist, 15); |
| bool pc_temp = load && writeback && bits(reglist, 15); |
| |
| if (!ones) { |
| numMicroops = 1; |
| } else if (load) { |
| numMicroops = ((ones + 1) / 2) |
| + ((ones % 2 == 0 && exception_ret) ? 1 : 0) |
| + (copy_base ? 1 : 0) |
| + (writeback? 1 : 0) |
| + (pc_temp ? 1 : 0); |
| } else { |
| numMicroops = ones + (writeback ? 1 : 0); |
| } |
| |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| uint32_t addr = 0; |
| |
| if (!up) |
| addr = (ones << 2) - 4; |
| |
| if (!index) |
| addr += 4; |
| |
| StaticInstPtr *uop = microOps; |
| |
| // Add 0 to Rn and stick it in ureg0. |
| // This is equivalent to a move. |
| if (copy_base) |
| *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0); |
| |
| unsigned reg = 0; |
| while (mem_ops != 0) { |
| // Do load operations in pairs if possible |
| if (load && mem_ops >= 2 && |
| !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) { |
| // 64-bit memory operation |
| // Find 2 set register bits (clear them after finding) |
| unsigned reg_idx1; |
| unsigned reg_idx2; |
| |
| // Find the first register |
| while (!bits(regs, reg)) reg++; |
| replaceBits(regs, reg, 0); |
| reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg; |
| |
| // Find the second register |
| while (!bits(regs, reg)) reg++; |
| replaceBits(regs, reg, 0); |
| reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg; |
| |
| // Load into temp reg if necessary |
| if (reg_idx2 == INTREG_PC && pc_temp) |
| reg_idx2 = INTREG_UREG1; |
| |
| // Actually load both registers from memory |
| *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2, |
| copy_base ? INTREG_UREG0 : rn, up, addr); |
| |
| if (!writeback && reg_idx2 == INTREG_PC) { |
| // No writeback if idx==pc, set appropriate flags |
| (*uop)->setFlag(StaticInst::IsControl); |
| (*uop)->setFlag(StaticInst::IsIndirectControl); |
| |
| if (!(condCode == COND_AL || condCode == COND_UC)) |
| (*uop)->setFlag(StaticInst::IsCondControl); |
| else |
| (*uop)->setFlag(StaticInst::IsUncondControl); |
| } |
| |
| if (up) addr += 8; |
| else addr -= 8; |
| mem_ops -= 2; |
| } else { |
| // 32-bit memory operation |
| // Find register for operation |
| unsigned reg_idx; |
| while (!bits(regs, reg)) reg++; |
| replaceBits(regs, reg, 0); |
| reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg; |
| |
| if (load) { |
| if (writeback && reg_idx == INTREG_PC) { |
| // If this instruction changes the PC and performs a |
| // writeback, ensure the pc load/branch is the last uop. |
| // Load into a temp reg here. |
| *uop = new MicroLdrUop(machInst, INTREG_UREG1, |
| copy_base ? INTREG_UREG0 : rn, up, addr); |
| } else if (reg_idx == INTREG_PC && exception_ret) { |
| // Special handling for exception return |
| *uop = new MicroLdrRetUop(machInst, reg_idx, |
| copy_base ? INTREG_UREG0 : rn, up, addr); |
| } else { |
| // standard single load uop |
| *uop = new MicroLdrUop(machInst, reg_idx, |
| copy_base ? INTREG_UREG0 : rn, up, addr); |
| } |
| |
| // Loading pc as last operation? Set appropriate flags. |
| if (!writeback && reg_idx == INTREG_PC) { |
| (*uop)->setFlag(StaticInst::IsControl); |
| (*uop)->setFlag(StaticInst::IsIndirectControl); |
| |
| if (!(condCode == COND_AL || condCode == COND_UC)) |
| (*uop)->setFlag(StaticInst::IsCondControl); |
| else |
| (*uop)->setFlag(StaticInst::IsUncondControl); |
| } |
| } else { |
| *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr); |
| } |
| |
| if (up) addr += 4; |
| else addr -= 4; |
| --mem_ops; |
| } |
| |
| // Load/store micro-op generated, go to next uop |
| ++uop; |
| } |
| |
| if (writeback && ones) { |
| // Perform writeback uop operation |
| if (up) |
| *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4); |
| else |
| *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4); |
| |
| // Write PC after address writeback? |
| if (pc_temp) { |
| if (exception_ret) { |
| *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1); |
| } else { |
| *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1); |
| } |
| (*uop)->setFlag(StaticInst::IsControl); |
| (*uop)->setFlag(StaticInst::IsIndirectControl); |
| |
| if (!(condCode == COND_AL || condCode == COND_UC)) |
| (*uop)->setFlag(StaticInst::IsCondControl); |
| else |
| (*uop)->setFlag(StaticInst::IsUncondControl); |
| |
| if (rn == INTREG_SP) |
| (*uop)->setFlag(StaticInst::IsReturn); |
| |
| ++uop; |
| } |
| } |
| |
| --uop; |
| (*uop)->setLastMicroop(); |
| microOps[0]->setFirstMicroop(); |
| |
| /* Take the control flags from the last microop for the macroop */ |
| if ((*uop)->isControl()) |
| setFlag(StaticInst::IsControl); |
| if ((*uop)->isCondCtrl()) |
| setFlag(StaticInst::IsCondControl); |
| if ((*uop)->isUncondCtrl()) |
| setFlag(StaticInst::IsUncondControl); |
| if ((*uop)->isIndirectCtrl()) |
| setFlag(StaticInst::IsIndirectControl); |
| if ((*uop)->isReturn()) |
| setFlag(StaticInst::IsReturn); |
| |
| for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) { |
| (*uop)->setDelayedCommit(); |
| } |
| } |
| |
| PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, |
| uint32_t size, bool fp, bool load, bool noAlloc, |
| bool signExt, bool exclusive, bool acrel, |
| int64_t imm, AddrMode mode, |
| IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| bool post = (mode == AddrMd_PostIndex); |
| bool writeback = (mode != AddrMd_Offset); |
| |
| if (load) { |
| // Use integer rounding to round up loads of size 4 |
| numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0); |
| } else { |
| numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0); |
| } |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| StaticInstPtr *uop = microOps; |
| |
| rn = makeSP(rn); |
| |
| if (!post) { |
| *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, |
| post ? 0 : imm); |
| } |
| |
| if (fp) { |
| if (size == 16) { |
| if (load) { |
| *uop++ = new MicroLdFp16Uop(machInst, rt, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| *uop++ = new MicroLdFp16Uop(machInst, rt2, |
| post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); |
| } else { |
| *uop++ = new MicroStrQBFpXImmUop(machInst, rt, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| *uop++ = new MicroStrQTFpXImmUop(machInst, rt, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| *uop++ = new MicroStrQBFpXImmUop(machInst, rt2, |
| post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); |
| *uop++ = new MicroStrQTFpXImmUop(machInst, rt2, |
| post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); |
| } |
| } else if (size == 8) { |
| if (load) { |
| *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| } else { |
| *uop++ = new MicroStrFpXImmUop(machInst, rt, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| *uop++ = new MicroStrFpXImmUop(machInst, rt2, |
| post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel); |
| } |
| } else if (size == 4) { |
| if (load) { |
| *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| } else { |
| *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| } |
| } |
| } else { |
| if (size == 8) { |
| if (load) { |
| *uop++ = new MicroLdPairUop(machInst, rt, rt2, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| } else { |
| *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0, |
| 0, noAlloc, exclusive, acrel); |
| *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0, |
| size, noAlloc, exclusive, acrel); |
| } |
| } else if (size == 4) { |
| if (load) { |
| if (signExt) { |
| *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| } else { |
| *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| } |
| } else { |
| *uop++ = new MicroStrDXImmUop(machInst, rt, rt2, |
| post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); |
| } |
| } |
| } |
| |
| if (writeback) { |
| *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0, |
| post ? imm : 0); |
| } |
| |
| assert(uop == µOps[numMicroops]); |
| (*--uop)->setLastMicroop(); |
| microOps[0]->setFirstMicroop(); |
| |
| for (StaticInstPtr *curUop = microOps; |
| !(*curUop)->isLastMicroop(); curUop++) { |
| (*curUop)->setDelayedCommit(); |
| } |
| } |
| |
| BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, bool load, IntRegIndex dest, |
| IntRegIndex base, int64_t imm) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| numMicroops = load ? 1 : 2; |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| StaticInstPtr *uop = microOps; |
| |
| if (load) { |
| *uop = new MicroLdFp16Uop(machInst, dest, base, imm); |
| } else { |
| *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm); |
| (*uop)->setDelayedCommit(); |
| *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm); |
| } |
| (*uop)->setLastMicroop(); |
| microOps[0]->setFirstMicroop(); |
| } |
| |
| BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, bool load, IntRegIndex dest, |
| IntRegIndex base, int64_t imm) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| numMicroops = load ? 2 : 3; |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| StaticInstPtr *uop = microOps; |
| |
| if (load) { |
| *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0); |
| } else { |
| *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0); |
| *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0); |
| } |
| *uop = new MicroAddXiUop(machInst, base, base, imm); |
| (*uop)->setLastMicroop(); |
| microOps[0]->setFirstMicroop(); |
| |
| for (StaticInstPtr *curUop = microOps; |
| !(*curUop)->isLastMicroop(); curUop++) { |
| (*curUop)->setDelayedCommit(); |
| } |
| } |
| |
| BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, bool load, IntRegIndex dest, |
| IntRegIndex base, int64_t imm) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| numMicroops = load ? 2 : 3; |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| StaticInstPtr *uop = microOps; |
| |
| if (load) { |
| *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm); |
| } else { |
| *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm); |
| *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm); |
| } |
| *uop = new MicroAddXiUop(machInst, base, base, imm); |
| (*uop)->setLastMicroop(); |
| microOps[0]->setFirstMicroop(); |
| |
| for (StaticInstPtr *curUop = microOps; |
| !(*curUop)->isLastMicroop(); curUop++) { |
| (*curUop)->setDelayedCommit(); |
| } |
| } |
| |
| BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, bool load, IntRegIndex dest, |
| IntRegIndex base, IntRegIndex offset, |
| ArmExtendType type, int64_t imm) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| numMicroops = load ? 1 : 2; |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| StaticInstPtr *uop = microOps; |
| |
| if (load) { |
| *uop = new MicroLdFp16RegUop(machInst, dest, base, |
| offset, type, imm); |
| } else { |
| *uop = new MicroStrQBFpXRegUop(machInst, dest, base, |
| offset, type, imm); |
| (*uop)->setDelayedCommit(); |
| *++uop = new MicroStrQTFpXRegUop(machInst, dest, base, |
| offset, type, imm); |
| } |
| |
| (*uop)->setLastMicroop(); |
| microOps[0]->setFirstMicroop(); |
| } |
| |
| BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, IntRegIndex dest, |
| int64_t imm) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| numMicroops = 1; |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm); |
| microOps[0]->setLastMicroop(); |
| microOps[0]->setFirstMicroop(); |
| } |
| |
| VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, |
| unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, |
| unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| assert(regs > 0 && regs <= 4); |
| assert(regs % elems == 0); |
| |
| numMicroops = (regs > 2) ? 2 : 1; |
| bool wb = (rm != 15); |
| bool deinterleave = (elems > 1); |
| |
| if (wb) numMicroops++; |
| if (deinterleave) numMicroops += (regs / elems); |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| RegIndex rMid = deinterleave ? VecSpecialElem : vd * 2; |
| |
| uint32_t noAlign = 0; |
| |
| unsigned uopIdx = 0; |
| switch (regs) { |
| case 4: |
| microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( |
| size, machInst, rMid, rn, 0, align); |
| microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( |
| size, machInst, rMid + 4, rn, 16, noAlign); |
| break; |
| case 3: |
| microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( |
| size, machInst, rMid, rn, 0, align); |
| microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( |
| size, machInst, rMid + 4, rn, 16, noAlign); |
| break; |
| case 2: |
| microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( |
| size, machInst, rMid, rn, 0, align); |
| break; |
| case 1: |
| microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( |
| size, machInst, rMid, rn, 0, align); |
| break; |
| default: |
| // Unknown number of registers |
| microOps[uopIdx++] = new Unknown(machInst); |
| } |
| if (wb) { |
| if (rm != 15 && rm != 13) { |
| microOps[uopIdx++] = |
| new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); |
| } else { |
| microOps[uopIdx++] = |
| new MicroAddiUop(machInst, rn, rn, regs * 8); |
| } |
| } |
| if (deinterleave) { |
| switch (elems) { |
| case 4: |
| assert(regs == 4); |
| microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( |
| size, machInst, vd * 2, rMid, inc * 2); |
| break; |
| case 3: |
| assert(regs == 3); |
| microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( |
| size, machInst, vd * 2, rMid, inc * 2); |
| break; |
| case 2: |
| assert(regs == 4 || regs == 2); |
| if (regs == 4) { |
| microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( |
| size, machInst, vd * 2, rMid, inc * 2); |
| microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( |
| size, machInst, vd * 2 + 2, rMid + 4, inc * 2); |
| } else { |
| microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( |
| size, machInst, vd * 2, rMid, inc * 2); |
| } |
| break; |
| default: |
| // Bad number of elements to deinterleave |
| microOps[uopIdx++] = new Unknown(machInst); |
| } |
| } |
| assert(uopIdx == numMicroops); |
| |
| for (unsigned i = 0; i < numMicroops - 1; i++) { |
| MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); |
| assert(uopPtr); |
| uopPtr->setDelayedCommit(); |
| } |
| microOps[0]->setFirstMicroop(); |
| microOps[numMicroops - 1]->setLastMicroop(); |
| } |
| |
| VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, bool all, unsigned elems, |
| RegIndex rn, RegIndex vd, unsigned regs, |
| unsigned inc, uint32_t size, uint32_t align, |
| RegIndex rm, unsigned lane) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| assert(regs > 0 && regs <= 4); |
| assert(regs % elems == 0); |
| |
| unsigned eBytes = (1 << size); |
| unsigned loadSize = eBytes * elems; |
| [[maybe_unused]] unsigned loadRegs = |
| (loadSize + sizeof(uint32_t) - 1) / sizeof(uint32_t); |
| |
| assert(loadRegs > 0 && loadRegs <= 4); |
| |
| numMicroops = 1; |
| bool wb = (rm != 15); |
| |
| if (wb) numMicroops++; |
| numMicroops += (regs / elems); |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| RegIndex ufp0 = VecSpecialElem; |
| |
| unsigned uopIdx = 0; |
| switch (loadSize) { |
| case 1: |
| microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 2: |
| if (eBytes == 2) { |
| microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( |
| machInst, ufp0, rn, 0, align); |
| } else { |
| microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( |
| machInst, ufp0, rn, 0, align); |
| } |
| break; |
| case 3: |
| microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 4: |
| switch (eBytes) { |
| case 1: |
| microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 2: |
| microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 4: |
| microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| } |
| break; |
| case 6: |
| microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 8: |
| switch (eBytes) { |
| case 2: |
| microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 4: |
| microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| } |
| break; |
| case 12: |
| microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 16: |
| microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| default: |
| // Unrecognized load size |
| microOps[uopIdx++] = new Unknown(machInst); |
| } |
| if (wb) { |
| if (rm != 15 && rm != 13) { |
| microOps[uopIdx++] = |
| new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); |
| } else { |
| microOps[uopIdx++] = |
| new MicroAddiUop(machInst, rn, rn, loadSize); |
| } |
| } |
| switch (elems) { |
| case 4: |
| assert(regs == 4); |
| switch (size) { |
| case 0: |
| if (all) { |
| microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( |
| machInst, vd * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( |
| machInst, vd * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| case 1: |
| if (all) { |
| microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( |
| machInst, vd * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( |
| machInst, vd * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| case 2: |
| if (all) { |
| microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( |
| machInst, vd * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( |
| machInst, vd * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| default: |
| // Bad size |
| microOps[uopIdx++] = new Unknown(machInst); |
| break; |
| } |
| break; |
| case 3: |
| assert(regs == 3); |
| switch (size) { |
| case 0: |
| if (all) { |
| microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( |
| machInst, vd * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( |
| machInst, vd * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| case 1: |
| if (all) { |
| microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( |
| machInst, vd * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( |
| machInst, vd * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| case 2: |
| if (all) { |
| microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( |
| machInst, vd * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( |
| machInst, vd * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| default: |
| // Bad size |
| microOps[uopIdx++] = new Unknown(machInst); |
| break; |
| } |
| break; |
| case 2: |
| assert(regs == 2); |
| assert(loadRegs <= 2); |
| switch (size) { |
| case 0: |
| if (all) { |
| microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( |
| machInst, vd * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( |
| machInst, vd * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| case 1: |
| if (all) { |
| microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( |
| machInst, vd * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( |
| machInst, vd * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| case 2: |
| if (all) { |
| microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( |
| machInst, vd * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( |
| machInst, vd * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| default: |
| // Bad size |
| microOps[uopIdx++] = new Unknown(machInst); |
| break; |
| } |
| break; |
| case 1: |
| assert(regs == 1 || (all && regs == 2)); |
| assert(loadRegs <= 2); |
| for (unsigned offset = 0; offset < regs; offset++) { |
| switch (size) { |
| case 0: |
| if (all) { |
| microOps[uopIdx++] = |
| new MicroUnpackAllNeon2to2Uop<uint8_t>( |
| machInst, (vd + offset) * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = |
| new MicroUnpackNeon2to2Uop<uint8_t>( |
| machInst, (vd + offset) * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| case 1: |
| if (all) { |
| microOps[uopIdx++] = |
| new MicroUnpackAllNeon2to2Uop<uint16_t>( |
| machInst, (vd + offset) * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = |
| new MicroUnpackNeon2to2Uop<uint16_t>( |
| machInst, (vd + offset) * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| case 2: |
| if (all) { |
| microOps[uopIdx++] = |
| new MicroUnpackAllNeon2to2Uop<uint32_t>( |
| machInst, (vd + offset) * 2, ufp0, inc * 2); |
| } else { |
| microOps[uopIdx++] = |
| new MicroUnpackNeon2to2Uop<uint32_t>( |
| machInst, (vd + offset) * 2, ufp0, inc * 2, lane); |
| } |
| break; |
| default: |
| // Bad size |
| microOps[uopIdx++] = new Unknown(machInst); |
| break; |
| } |
| } |
| break; |
| default: |
| // Bad number of elements to unpack |
| microOps[uopIdx++] = new Unknown(machInst); |
| } |
| assert(uopIdx == numMicroops); |
| |
| for (unsigned i = 0; i < numMicroops - 1; i++) { |
| MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); |
| assert(uopPtr); |
| uopPtr->setDelayedCommit(); |
| } |
| microOps[0]->setFirstMicroop(); |
| microOps[numMicroops - 1]->setLastMicroop(); |
| } |
| |
| VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, |
| unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, |
| unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| assert(regs > 0 && regs <= 4); |
| assert(regs % elems == 0); |
| |
| numMicroops = (regs > 2) ? 2 : 1; |
| bool wb = (rm != 15); |
| bool interleave = (elems > 1); |
| |
| if (wb) numMicroops++; |
| if (interleave) numMicroops += (regs / elems); |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| uint32_t noAlign = 0; |
| |
| RegIndex rMid = interleave ? VecSpecialElem : vd * 2; |
| |
| unsigned uopIdx = 0; |
| if (interleave) { |
| switch (elems) { |
| case 4: |
| assert(regs == 4); |
| microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( |
| size, machInst, rMid, vd * 2, inc * 2); |
| break; |
| case 3: |
| assert(regs == 3); |
| microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( |
| size, machInst, rMid, vd * 2, inc * 2); |
| break; |
| case 2: |
| assert(regs == 4 || regs == 2); |
| if (regs == 4) { |
| microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( |
| size, machInst, rMid, vd * 2, inc * 2); |
| microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( |
| size, machInst, rMid + 4, vd * 2 + 2, inc * 2); |
| } else { |
| microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( |
| size, machInst, rMid, vd * 2, inc * 2); |
| } |
| break; |
| default: |
| // Bad number of elements to interleave |
| microOps[uopIdx++] = new Unknown(machInst); |
| } |
| } |
| switch (regs) { |
| case 4: |
| microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( |
| size, machInst, rMid, rn, 0, align); |
| microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( |
| size, machInst, rMid + 4, rn, 16, noAlign); |
| break; |
| case 3: |
| microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( |
| size, machInst, rMid, rn, 0, align); |
| microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( |
| size, machInst, rMid + 4, rn, 16, noAlign); |
| break; |
| case 2: |
| microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( |
| size, machInst, rMid, rn, 0, align); |
| break; |
| case 1: |
| microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( |
| size, machInst, rMid, rn, 0, align); |
| break; |
| default: |
| // Unknown number of registers |
| microOps[uopIdx++] = new Unknown(machInst); |
| } |
| if (wb) { |
| if (rm != 15 && rm != 13) { |
| microOps[uopIdx++] = |
| new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); |
| } else { |
| microOps[uopIdx++] = |
| new MicroAddiUop(machInst, rn, rn, regs * 8); |
| } |
| } |
| assert(uopIdx == numMicroops); |
| |
| for (unsigned i = 0; i < numMicroops - 1; i++) { |
| MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); |
| assert(uopPtr); |
| uopPtr->setDelayedCommit(); |
| } |
| microOps[0]->setFirstMicroop(); |
| microOps[numMicroops - 1]->setLastMicroop(); |
| } |
| |
| VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, bool all, unsigned elems, |
| RegIndex rn, RegIndex vd, unsigned regs, |
| unsigned inc, uint32_t size, uint32_t align, |
| RegIndex rm, unsigned lane) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| assert(!all); |
| assert(regs > 0 && regs <= 4); |
| assert(regs % elems == 0); |
| |
| unsigned eBytes = (1 << size); |
| unsigned storeSize = eBytes * elems; |
| [[maybe_unused]] unsigned storeRegs = |
| (storeSize + sizeof(uint32_t) - 1) / sizeof(uint32_t); |
| |
| assert(storeRegs > 0 && storeRegs <= 4); |
| |
| numMicroops = 1; |
| bool wb = (rm != 15); |
| |
| if (wb) numMicroops++; |
| numMicroops += (regs / elems); |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| RegIndex ufp0 = VecSpecialElem; |
| |
| unsigned uopIdx = 0; |
| switch (elems) { |
| case 4: |
| assert(regs == 4); |
| switch (size) { |
| case 0: |
| microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( |
| machInst, ufp0, vd * 2, inc * 2, lane); |
| break; |
| case 1: |
| microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( |
| machInst, ufp0, vd * 2, inc * 2, lane); |
| break; |
| case 2: |
| microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( |
| machInst, ufp0, vd * 2, inc * 2, lane); |
| break; |
| default: |
| // Bad size |
| microOps[uopIdx++] = new Unknown(machInst); |
| break; |
| } |
| break; |
| case 3: |
| assert(regs == 3); |
| switch (size) { |
| case 0: |
| microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( |
| machInst, ufp0, vd * 2, inc * 2, lane); |
| break; |
| case 1: |
| microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( |
| machInst, ufp0, vd * 2, inc * 2, lane); |
| break; |
| case 2: |
| microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( |
| machInst, ufp0, vd * 2, inc * 2, lane); |
| break; |
| default: |
| // Bad size |
| microOps[uopIdx++] = new Unknown(machInst); |
| break; |
| } |
| break; |
| case 2: |
| assert(regs == 2); |
| assert(storeRegs <= 2); |
| switch (size) { |
| case 0: |
| microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( |
| machInst, ufp0, vd * 2, inc * 2, lane); |
| break; |
| case 1: |
| microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( |
| machInst, ufp0, vd * 2, inc * 2, lane); |
| break; |
| case 2: |
| microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( |
| machInst, ufp0, vd * 2, inc * 2, lane); |
| break; |
| default: |
| // Bad size |
| microOps[uopIdx++] = new Unknown(machInst); |
| break; |
| } |
| break; |
| case 1: |
| assert(regs == 1 || (all && regs == 2)); |
| assert(storeRegs <= 2); |
| for (unsigned offset = 0; offset < regs; offset++) { |
| switch (size) { |
| case 0: |
| microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( |
| machInst, ufp0, (vd + offset) * 2, inc * 2, lane); |
| break; |
| case 1: |
| microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( |
| machInst, ufp0, (vd + offset) * 2, inc * 2, lane); |
| break; |
| case 2: |
| microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( |
| machInst, ufp0, (vd + offset) * 2, inc * 2, lane); |
| break; |
| default: |
| // Bad size |
| microOps[uopIdx++] = new Unknown(machInst); |
| break; |
| } |
| } |
| break; |
| default: |
| // Bad number of elements to unpack |
| microOps[uopIdx++] = new Unknown(machInst); |
| } |
| switch (storeSize) { |
| case 1: |
| microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 2: |
| if (eBytes == 2) { |
| microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( |
| machInst, ufp0, rn, 0, align); |
| } else { |
| microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( |
| machInst, ufp0, rn, 0, align); |
| } |
| break; |
| case 3: |
| microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 4: |
| switch (eBytes) { |
| case 1: |
| microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 2: |
| microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 4: |
| microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| } |
| break; |
| case 6: |
| microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 8: |
| switch (eBytes) { |
| case 2: |
| microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 4: |
| microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| } |
| break; |
| case 12: |
| microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| case 16: |
| microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( |
| machInst, ufp0, rn, 0, align); |
| break; |
| default: |
| // Bad store size |
| microOps[uopIdx++] = new Unknown(machInst); |
| } |
| if (wb) { |
| if (rm != 15 && rm != 13) { |
| microOps[uopIdx++] = |
| new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); |
| } else { |
| microOps[uopIdx++] = |
| new MicroAddiUop(machInst, rn, rn, storeSize); |
| } |
| } |
| assert(uopIdx == numMicroops); |
| |
| for (unsigned i = 0; i < numMicroops - 1; i++) { |
| MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); |
| assert(uopPtr); |
| uopPtr->setDelayedCommit(); |
| } |
| microOps[0]->setFirstMicroop(); |
| microOps[numMicroops - 1]->setLastMicroop(); |
| } |
| |
| VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, RegIndex rn, RegIndex vd, |
| RegIndex rm, uint8_t eSize, uint8_t dataSize, |
| uint8_t numStructElems, uint8_t numRegs, bool wb) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| RegIndex vx = NumVecV8ArchRegs; |
| RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); |
| bool baseIsSP = isSP((IntRegIndex) rnsp); |
| |
| numMicroops = wb ? 1 : 0; |
| |
| int totNumBytes = numRegs * dataSize / 8; |
| assert(totNumBytes <= 64); |
| |
| // The guiding principle here is that no more than 16 bytes can be |
| // transferred at a time |
| int numMemMicroops = totNumBytes / 16; |
| int residuum = totNumBytes % 16; |
| if (residuum) |
| ++numMemMicroops; |
| numMicroops += numMemMicroops; |
| |
| int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); |
| numMicroops += numMarshalMicroops; |
| |
| microOps = new StaticInstPtr[numMicroops]; |
| unsigned uopIdx = 0; |
| uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned; |
| |
| int i = 0; |
| for (; i < numMemMicroops - 1; ++i) { |
| microOps[uopIdx++] = new MicroNeonLoad64( |
| machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, |
| baseIsSP, 16 /* accSize */, eSize); |
| } |
| microOps[uopIdx++] = new MicroNeonLoad64( |
| machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, |
| residuum ? residuum : 16 /* accSize */, eSize); |
| |
| // Writeback microop: the post-increment amount is encoded in "Rm": a |
| // 64-bit general register OR as '11111' for an immediate value equal to |
| // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) |
| if (wb) { |
| if (rm != ((RegIndex) INTREG_X31)) { |
| microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, |
| UXTX, 0); |
| } else { |
| microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, |
| totNumBytes); |
| } |
| } |
| |
| for (int i = 0; i < numMarshalMicroops; ++i) { |
| switch(numRegs) { |
| case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg( |
| machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, |
| numStructElems, 1, i /* step */); |
| break; |
| case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg( |
| machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, |
| numStructElems, 2, i /* step */); |
| break; |
| case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg( |
| machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, |
| numStructElems, 3, i /* step */); |
| break; |
| case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg( |
| machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, |
| numStructElems, 4, i /* step */); |
| break; |
| default: panic("Invalid number of registers"); |
| } |
| |
| } |
| |
| assert(uopIdx == numMicroops); |
| |
| for (int i = 0; i < numMicroops - 1; ++i) { |
| microOps[i]->setDelayedCommit(); |
| } |
| microOps[0]->setFirstMicroop(); |
| microOps[numMicroops - 1]->setLastMicroop(); |
| } |
| |
| VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, RegIndex rn, RegIndex vd, |
| RegIndex rm, uint8_t eSize, uint8_t dataSize, |
| uint8_t numStructElems, uint8_t numRegs, bool wb) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| RegIndex vx = NumVecV8ArchRegs; |
| RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); |
| bool baseIsSP = isSP((IntRegIndex) rnsp); |
| |
| numMicroops = wb ? 1 : 0; |
| |
| int totNumBytes = numRegs * dataSize / 8; |
| assert(totNumBytes <= 64); |
| |
| // The guiding principle here is that no more than 16 bytes can be |
| // transferred at a time |
| int numMemMicroops = totNumBytes / 16; |
| int residuum = totNumBytes % 16; |
| if (residuum) |
| ++numMemMicroops; |
| numMicroops += numMemMicroops; |
| |
| int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; |
| numMicroops += numMarshalMicroops; |
| |
| microOps = new StaticInstPtr[numMicroops]; |
| unsigned uopIdx = 0; |
| |
| for (int i = 0; i < numMarshalMicroops; ++i) { |
| switch (numRegs) { |
| case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg( |
| machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, |
| numStructElems, 1, i /* step */); |
| break; |
| case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg( |
| machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, |
| numStructElems, 2, i /* step */); |
| break; |
| case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg( |
| machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, |
| numStructElems, 3, i /* step */); |
| break; |
| case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg( |
| machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, |
| numStructElems, 4, i /* step */); |
| break; |
| default: panic("Invalid number of registers"); |
| } |
| } |
| |
| uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned; |
| |
| int i = 0; |
| for (; i < numMemMicroops - 1; ++i) { |
| microOps[uopIdx++] = new MicroNeonStore64( |
| machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, |
| baseIsSP, 16 /* accSize */, eSize); |
| } |
| microOps[uopIdx++] = new MicroNeonStore64( |
| machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, |
| residuum ? residuum : 16 /* accSize */, eSize); |
| |
| // Writeback microop: the post-increment amount is encoded in "Rm": a |
| // 64-bit general register OR as '11111' for an immediate value equal to |
| // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) |
| if (wb) { |
| if (rm != ((RegIndex) INTREG_X31)) { |
| microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, |
| UXTX, 0); |
| } else { |
| microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, |
| totNumBytes); |
| } |
| } |
| |
| assert(uopIdx == numMicroops); |
| |
| for (int i = 0; i < numMicroops - 1; i++) { |
| microOps[i]->setDelayedCommit(); |
| } |
| microOps[0]->setFirstMicroop(); |
| microOps[numMicroops - 1]->setLastMicroop(); |
| } |
| |
| VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, RegIndex rn, RegIndex vd, |
| RegIndex rm, uint8_t eSize, uint8_t dataSize, |
| uint8_t numStructElems, uint8_t index, bool wb, |
| bool replicate) : |
| PredMacroOp(mnem, machInst, __opClass), |
| eSize(0), dataSize(0), numStructElems(0), index(0), |
| wb(false), replicate(false) |
| |
| { |
| RegIndex vx = NumVecV8ArchRegs; |
| RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); |
| bool baseIsSP = isSP((IntRegIndex) rnsp); |
| |
| numMicroops = wb ? 1 : 0; |
| |
| int eSizeBytes = 1 << eSize; |
| int totNumBytes = numStructElems * eSizeBytes; |
| assert(totNumBytes <= 64); |
| |
| // The guiding principle here is that no more than 16 bytes can be |
| // transferred at a time |
| int numMemMicroops = totNumBytes / 16; |
| int residuum = totNumBytes % 16; |
| if (residuum) |
| ++numMemMicroops; |
| numMicroops += numMemMicroops; |
| |
| int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); |
| numMicroops += numMarshalMicroops; |
| |
| microOps = new StaticInstPtr[numMicroops]; |
| unsigned uopIdx = 0; |
| |
| uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned; |
| |
| int i = 0; |
| for (; i < numMemMicroops - 1; ++i) { |
| microOps[uopIdx++] = new MicroNeonLoad64( |
| machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, |
| baseIsSP, 16 /* accSize */, eSize); |
| } |
| microOps[uopIdx++] = new MicroNeonLoad64( |
| machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, |
| residuum ? residuum : 16 /* accSize */, eSize); |
| |
| // Writeback microop: the post-increment amount is encoded in "Rm": a |
| // 64-bit general register OR as '11111' for an immediate value equal to |
| // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) |
| if (wb) { |
| if (rm != ((RegIndex) INTREG_X31)) { |
| microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, |
| UXTX, 0); |
| } else { |
| microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, |
| totNumBytes); |
| } |
| } |
| |
| for (int i = 0; i < numMarshalMicroops; ++i) { |
| microOps[uopIdx++] = new MicroUnpackNeon64( |
| machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, |
| numStructElems, index, i /* step */, replicate); |
| } |
| |
| assert(uopIdx == numMicroops); |
| |
| for (int i = 0; i < numMicroops - 1; i++) { |
| microOps[i]->setDelayedCommit(); |
| } |
| microOps[0]->setFirstMicroop(); |
| microOps[numMicroops - 1]->setLastMicroop(); |
| } |
| |
| VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, RegIndex rn, RegIndex vd, |
| RegIndex rm, uint8_t eSize, uint8_t dataSize, |
| uint8_t numStructElems, uint8_t index, bool wb, |
| bool replicate) : |
| PredMacroOp(mnem, machInst, __opClass), |
| eSize(0), dataSize(0), numStructElems(0), index(0), |
| wb(false), replicate(false) |
| { |
| RegIndex vx = NumVecV8ArchRegs; |
| RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); |
| bool baseIsSP = isSP((IntRegIndex) rnsp); |
| |
| numMicroops = wb ? 1 : 0; |
| |
| int eSizeBytes = 1 << eSize; |
| int totNumBytes = numStructElems * eSizeBytes; |
| assert(totNumBytes <= 64); |
| |
| // The guiding principle here is that no more than 16 bytes can be |
| // transferred at a time |
| int numMemMicroops = totNumBytes / 16; |
| int residuum = totNumBytes % 16; |
| if (residuum) |
| ++numMemMicroops; |
| numMicroops += numMemMicroops; |
| |
| int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; |
| numMicroops += numMarshalMicroops; |
| |
| microOps = new StaticInstPtr[numMicroops]; |
| unsigned uopIdx = 0; |
| |
| for (int i = 0; i < numMarshalMicroops; ++i) { |
| microOps[uopIdx++] = new MicroPackNeon64( |
| machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, |
| numStructElems, index, i /* step */, replicate); |
| } |
| |
| uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned; |
| |
| int i = 0; |
| for (; i < numMemMicroops - 1; ++i) { |
| microOps[uopIdx++] = new MicroNeonStore64( |
| machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, |
| baseIsSP, 16 /* accsize */, eSize); |
| } |
| microOps[uopIdx++] = new MicroNeonStore64( |
| machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, |
| residuum ? residuum : 16 /* accSize */, eSize); |
| |
| // Writeback microop: the post-increment amount is encoded in "Rm": a |
| // 64-bit general register OR as '11111' for an immediate value equal to |
| // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) |
| if (wb) { |
| if (rm != ((RegIndex) INTREG_X31)) { |
| microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, |
| UXTX, 0); |
| } else { |
| microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, |
| totNumBytes); |
| } |
| } |
| |
| assert(uopIdx == numMicroops); |
| |
| for (int i = 0; i < numMicroops - 1; i++) { |
| microOps[i]->setDelayedCommit(); |
| } |
| microOps[0]->setFirstMicroop(); |
| microOps[numMicroops - 1]->setLastMicroop(); |
| } |
| |
| MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, |
| OpClass __opClass, IntRegIndex rn, |
| RegIndex vd, bool single, bool up, |
| bool writeback, bool load, uint32_t offset) : |
| PredMacroOp(mnem, machInst, __opClass) |
| { |
| int i = 0; |
| |
| // The lowest order bit selects fldmx (set) or fldmd (clear). These seem |
| // to be functionally identical except that fldmx is deprecated. For now |
| // we'll assume they're otherwise interchangable. |
| int count = (single ? offset : (offset / 2)); |
| numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); |
| microOps = new StaticInstPtr[numMicroops]; |
| |
| int64_t addr = 0; |
| |
| if (!up) |
| addr = 4 * offset; |
| |
| bool tempUp = up; |
| for (int j = 0; j < count; j++) { |
| if (load) { |
| if (single) { |
| microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, |
| tempUp, addr); |
| } else { |
| microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, |
| tempUp, addr); |
| microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, |
| addr + (up ? 4 : -4)); |
| } |
| } else { |
| if (single) { |
| microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, |
| tempUp, addr); |
| } else { |
| microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, |
| tempUp, addr); |
| microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, |
| addr + (up ? 4 : -4)); |
| } |
| } |
| if (!tempUp) { |
| addr -= (single ? 4 : 8); |
| // The microops don't handle negative displacement, so turn if we |
| // hit zero, flip polarity and start adding. |
| if (addr <= 0) { |
| tempUp = true; |
| addr = -addr; |
| } |
| } else { |
| addr += (single ? 4 : 8); |
| } |
| } |
| |
| if (writeback) { |
| if (up) { |
| microOps[i++] = |
| new MicroAddiUop(machInst, rn, rn, 4 * offset); |
| } else { |
| microOps[i++] = |
| new MicroSubiUop(machInst, rn, rn, 4 * offset); |
| } |
| } |
| |
| assert(numMicroops == i); |
| microOps[0]->setFirstMicroop(); |
| microOps[numMicroops - 1]->setLastMicroop(); |
| |
| for (StaticInstPtr *curUop = microOps; |
| !(*curUop)->isLastMicroop(); curUop++) { |
| MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); |
| assert(uopPtr); |
| uopPtr->setDelayedCommit(); |
| } |
| } |
| |
| std::string |
| MicroIntImmOp::generateDisassembly( |
| Addr pc, const loader::SymbolTable *symtab) const |
| { |
| std::stringstream ss; |
| printMnemonic(ss); |
| printIntReg(ss, ura); |
| ss << ", "; |
| printIntReg(ss, urb); |
| ss << ", "; |
| ccprintf(ss, "#%d", imm); |
| return ss.str(); |
| } |
| |
| std::string |
| MicroIntImmXOp::generateDisassembly( |
| Addr pc, const loader::SymbolTable *symtab) const |
| { |
| std::stringstream ss; |
| printMnemonic(ss); |
| printIntReg(ss, ura); |
| ss << ", "; |
| printIntReg(ss, urb); |
| ss << ", "; |
| ccprintf(ss, "#%d", imm); |
| return ss.str(); |
| } |
| |
| std::string |
| MicroSetPCCPSR::generateDisassembly( |
| Addr pc, const loader::SymbolTable *symtab) const |
| { |
| std::stringstream ss; |
| printMnemonic(ss); |
| ss << "[PC,CPSR]"; |
| return ss.str(); |
| } |
| |
| std::string |
| MicroIntRegXOp::generateDisassembly( |
| Addr pc, const loader::SymbolTable *symtab) const |
| { |
| std::stringstream ss; |
| printMnemonic(ss); |
| printIntReg(ss, ura); |
| ccprintf(ss, ", "); |
| printIntReg(ss, urb); |
| printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); |
| return ss.str(); |
| } |
| |
| std::string |
| MicroIntMov::generateDisassembly( |
| Addr pc, const loader::SymbolTable *symtab) const |
| { |
| std::stringstream ss; |
| printMnemonic(ss); |
| printIntReg(ss, ura); |
| ss << ", "; |
| printIntReg(ss, urb); |
| return ss.str(); |
| } |
| |
| std::string |
| MicroIntOp::generateDisassembly( |
| Addr pc, const loader::SymbolTable *symtab) const |
| { |
| std::stringstream ss; |
| printMnemonic(ss); |
| printIntReg(ss, ura); |
| ss << ", "; |
| printIntReg(ss, urb); |
| ss << ", "; |
| printIntReg(ss, urc); |
| return ss.str(); |
| } |
| |
| std::string |
| MicroMemOp::generateDisassembly( |
| Addr pc, const loader::SymbolTable *symtab) const |
| { |
| std::stringstream ss; |
| printMnemonic(ss); |
| if (isFloating()) |
| printFloatReg(ss, ura); |
| else |
| printIntReg(ss, ura); |
| ss << ", ["; |
| printIntReg(ss, urb); |
| ss << ", "; |
| ccprintf(ss, "#%d", imm); |
| ss << "]"; |
| return ss.str(); |
| } |
| |
| std::string |
| MicroMemPairOp::generateDisassembly( |
| Addr pc, const loader::SymbolTable *symtab) const |
| { |
| std::stringstream ss; |
| printMnemonic(ss); |
| printIntReg(ss, dest); |
| ss << ","; |
| printIntReg(ss, dest2); |
| ss << ", ["; |
| printIntReg(ss, urb); |
| ss << ", "; |
| ccprintf(ss, "#%d", imm); |
| ss << "]"; |
| return ss.str(); |
| } |
| |
| } // namespace ArmISA |
| } // namespace gem5 |