| // -*- mode:c++ -*- |
| |
| // Copyright (c) 2010-2011, 2015, 2019 ARM Limited |
| // All rights reserved |
| // |
| // The license below extends only to copyright in the software and shall |
| // not be construed as granting a license to any other intellectual |
| // property including but not limited to intellectual property relating |
| // to a hardware implementation of the functionality of the software |
| // licensed hereunder. You may use the software subject to the license |
| // terms below provided that you ensure that this notice is replicated |
| // unmodified and in its entirety in all distributions of the software, |
| // modified or unmodified, in source code or in binary form. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer; |
| // redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution; |
| // neither the name of the copyright holders nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| output header {{ |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUThreeUReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| switch (size) { |
| case 0: |
| return new Base<uint8_t>(machInst, dest, op1, op2); |
| case 1: |
| return new Base<uint16_t>(machInst, dest, op1, op2); |
| case 2: |
| return new Base<uint32_t>(machInst, dest, op1, op2); |
| case 3: |
| return new Base<uint64_t>(machInst, dest, op1, op2); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <class BaseS, class BaseD> |
| StaticInstPtr |
| decodeNeonSizeSingleDouble(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| switch (size) { |
| case 2: |
| return new BaseS(machInst, dest, op1, op2); |
| case 3: |
| return new BaseD(machInst, dest, op1, op2); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonSThreeUReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| switch (size) { |
| case 0: |
| return new Base<int8_t>(machInst, dest, op1, op2); |
| case 1: |
| return new Base<int16_t>(machInst, dest, op1, op2); |
| case 2: |
| return new Base<int32_t>(machInst, dest, op1, op2); |
| case 3: |
| return new Base<int64_t>(machInst, dest, op1, op2); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUSThreeUReg(bool notSigned, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (notSigned) { |
| return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUThreeUSReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| switch (size) { |
| case 0: |
| return new Base<uint8_t>(machInst, dest, op1, op2); |
| case 1: |
| return new Base<uint16_t>(machInst, dest, op1, op2); |
| case 2: |
| return new Base<uint32_t>(machInst, dest, op1, op2); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonSThreeUSReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| switch (size) { |
| case 0: |
| return new Base<int8_t>(machInst, dest, op1, op2); |
| case 1: |
| return new Base<int16_t>(machInst, dest, op1, op2); |
| case 2: |
| return new Base<int32_t>(machInst, dest, op1, op2); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, |
| RegIndex op2) |
| { |
| switch (size) { |
| case 1: |
| return new Base<int16_t>(machInst, dest, op1, op2); |
| case 2: |
| return new Base<int32_t>(machInst, dest, op1, op2); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, |
| RegIndex op2, uint64_t imm) |
| { |
| switch (size) { |
| case 1: |
| return new Base<int16_t>(machInst, dest, op1, op2, imm); |
| case 2: |
| return new Base<int32_t>(machInst, dest, op1, op2, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUSThreeUSReg(bool notSigned, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (notSigned) { |
| return decodeNeonUThreeUSReg<Base>( |
| size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonSThreeUSReg<Base>( |
| size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUThreeSReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (q) { |
| return decodeNeonUThreeUSReg<BaseQ>( |
| size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonUThreeUSReg<BaseD>( |
| size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSThreeSReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (q) { |
| return decodeNeonSThreeUSReg<BaseQ>( |
| size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonSThreeUSReg<BaseD>( |
| size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSThreeXReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (q) { |
| return decodeNeonSThreeUReg<BaseQ>( |
| size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonSThreeUSReg<BaseD>( |
| size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUThreeXReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (q) { |
| return decodeNeonUThreeUReg<BaseQ>( |
| size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonUThreeUSReg<BaseD>( |
| size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (notSigned) { |
| return decodeNeonUThreeSReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonSThreeSReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUThreeReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (q) { |
| return decodeNeonUThreeUReg<BaseQ>( |
| size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonUThreeUReg<BaseD>( |
| size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSThreeReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (q) { |
| return decodeNeonSThreeUReg<BaseQ>( |
| size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonSThreeUReg<BaseD>( |
| size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, RegIndex op2) |
| { |
| if (notSigned) { |
| return decodeNeonUThreeReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1, op2); |
| } else { |
| return decodeNeonSThreeReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, RegIndex op2) |
| { |
| if (q) { |
| if (size) |
| return new BaseQ<uint64_t>(machInst, dest, op1, op2); |
| else |
| return new BaseQ<uint32_t>(machInst, dest, op1, op2); |
| } else { |
| if (size) |
| return new Unknown(machInst); |
| else |
| return new BaseD<uint32_t>(machInst, dest, op1, op2); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, RegIndex op2) |
| { |
| if (size) |
| return new Base<uint64_t>(machInst, dest, op1, op2); |
| else |
| return new Base<uint32_t>(machInst, dest, op1, op2); |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, |
| RegIndex op2, uint64_t imm) |
| { |
| if (size) |
| return new Base<uint64_t>(machInst, dest, op1, op2, imm); |
| else |
| return new Base<uint32_t>(machInst, dest, op1, op2, imm); |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, |
| RegIndex op2, uint64_t imm) |
| { |
| if (q) { |
| switch (size) { |
| case 1: |
| return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm); |
| case 2: |
| return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } else { |
| switch (size) { |
| case 1: |
| return new BaseD<uint16_t>(machInst, dest, op1, op2, imm); |
| case 2: |
| return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, |
| RegIndex op2, uint64_t imm) |
| { |
| if (q) { |
| switch (size) { |
| case 1: |
| return new BaseQ<int16_t>(machInst, dest, op1, op2, imm); |
| case 2: |
| return new BaseQ<int32_t>(machInst, dest, op1, op2, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } else { |
| switch (size) { |
| case 1: |
| return new BaseD<int16_t>(machInst, dest, op1, op2, imm); |
| case 2: |
| return new BaseD<int32_t>(machInst, dest, op1, op2, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, |
| RegIndex op2, uint64_t imm) |
| { |
| if (q) { |
| if (size) |
| return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm); |
| else |
| return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); |
| } else { |
| if (size) |
| return new Unknown(machInst); |
| else |
| return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUTwoShiftReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| if (q) { |
| switch (size) { |
| case 0: |
| return new BaseQ<uint8_t>(machInst, dest, op1, imm); |
| case 1: |
| return new BaseQ<uint16_t>(machInst, dest, op1, imm); |
| case 2: |
| return new BaseQ<uint32_t>(machInst, dest, op1, imm); |
| case 3: |
| return new BaseQ<uint64_t>(machInst, dest, op1, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } else { |
| switch (size) { |
| case 0: |
| return new BaseD<uint8_t>(machInst, dest, op1, imm); |
| case 1: |
| return new BaseD<uint16_t>(machInst, dest, op1, imm); |
| case 2: |
| return new BaseD<uint32_t>(machInst, dest, op1, imm); |
| case 3: |
| return new BaseD<uint64_t>(machInst, dest, op1, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSTwoShiftReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| if (q) { |
| switch (size) { |
| case 0: |
| return new BaseQ<int8_t>(machInst, dest, op1, imm); |
| case 1: |
| return new BaseQ<int16_t>(machInst, dest, op1, imm); |
| case 2: |
| return new BaseQ<int32_t>(machInst, dest, op1, imm); |
| case 3: |
| return new BaseQ<int64_t>(machInst, dest, op1, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } else { |
| switch (size) { |
| case 0: |
| return new BaseD<int8_t>(machInst, dest, op1, imm); |
| case 1: |
| return new BaseD<int16_t>(machInst, dest, op1, imm); |
| case 2: |
| return new BaseD<int32_t>(machInst, dest, op1, imm); |
| case 3: |
| return new BaseD<int64_t>(machInst, dest, op1, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| } |
| |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| if (notSigned) { |
| return decodeNeonUTwoShiftReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1, imm); |
| } else { |
| return decodeNeonSTwoShiftReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1, imm); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUTwoShiftUSReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| switch (size) { |
| case 0: |
| return new Base<uint8_t>(machInst, dest, op1, imm); |
| case 1: |
| return new Base<uint16_t>(machInst, dest, op1, imm); |
| case 2: |
| return new Base<uint32_t>(machInst, dest, op1, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUTwoShiftUReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| switch (size) { |
| case 0: |
| return new Base<uint8_t>(machInst, dest, op1, imm); |
| case 1: |
| return new Base<uint16_t>(machInst, dest, op1, imm); |
| case 2: |
| return new Base<uint32_t>(machInst, dest, op1, imm); |
| case 3: |
| return new Base<uint64_t>(machInst, dest, op1, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonSTwoShiftUReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| switch (size) { |
| case 0: |
| return new Base<int8_t>(machInst, dest, op1, imm); |
| case 1: |
| return new Base<int16_t>(machInst, dest, op1, imm); |
| case 2: |
| return new Base<int32_t>(machInst, dest, op1, imm); |
| case 3: |
| return new Base<int64_t>(machInst, dest, op1, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUTwoShiftSReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| if (q) { |
| return decodeNeonUTwoShiftUSReg<BaseQ>( |
| size, machInst, dest, op1, imm); |
| } else { |
| return decodeNeonUTwoShiftUSReg<BaseD>( |
| size, machInst, dest, op1, imm); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonSTwoShiftUSReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| switch (size) { |
| case 0: |
| return new Base<int8_t>(machInst, dest, op1, imm); |
| case 1: |
| return new Base<int16_t>(machInst, dest, op1, imm); |
| case 2: |
| return new Base<int32_t>(machInst, dest, op1, imm); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSTwoShiftSReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| if (q) { |
| return decodeNeonSTwoShiftUSReg<BaseQ>( |
| size, machInst, dest, op1, imm); |
| } else { |
| return decodeNeonSTwoShiftUSReg<BaseD>( |
| size, machInst, dest, op1, imm); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1, uint64_t imm) |
| { |
| if (notSigned) { |
| return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1, imm); |
| } else { |
| return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1, imm); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, uint64_t imm) |
| { |
| if (q) { |
| return decodeNeonUTwoShiftUReg<BaseQ>( |
| size, machInst, dest, op1, imm); |
| } else { |
| return decodeNeonUTwoShiftUSReg<BaseD>( |
| size, machInst, dest, op1, imm); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, uint64_t imm) |
| { |
| if (q) { |
| return decodeNeonSTwoShiftUReg<BaseQ>( |
| size, machInst, dest, op1, imm); |
| } else { |
| return decodeNeonSTwoShiftUSReg<BaseD>( |
| size, machInst, dest, op1, imm); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, uint64_t imm) |
| { |
| if (size) |
| return new Base<uint64_t>(machInst, dest, op1, imm); |
| else |
| return new Base<uint32_t>(machInst, dest, op1, imm); |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1, uint64_t imm) |
| { |
| if (q) { |
| if (size) |
| return new BaseQ<uint64_t>(machInst, dest, op1, imm); |
| else |
| return new BaseQ<uint32_t>(machInst, dest, op1, imm); |
| } else { |
| if (size) |
| return new Unknown(machInst); |
| else |
| return new BaseD<uint32_t>(machInst, dest, op1, imm); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUTwoMiscUSReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1) |
| { |
| switch (size) { |
| case 0: |
| return new Base<uint8_t>(machInst, dest, op1); |
| case 1: |
| return new Base<uint16_t>(machInst, dest, op1); |
| case 2: |
| return new Base<uint32_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonSTwoMiscUSReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1) |
| { |
| switch (size) { |
| case 0: |
| return new Base<int8_t>(machInst, dest, op1); |
| case 1: |
| return new Base<int16_t>(machInst, dest, op1); |
| case 2: |
| return new Base<int32_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUTwoMiscSReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1) |
| { |
| if (q) { |
| return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); |
| } else { |
| return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSTwoMiscSReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1) |
| { |
| if (q) { |
| return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); |
| } else { |
| return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUTwoMiscUReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1) |
| { |
| switch (size) { |
| case 0: |
| return new Base<uint8_t>(machInst, dest, op1); |
| case 1: |
| return new Base<uint16_t>(machInst, dest, op1); |
| case 2: |
| return new Base<uint32_t>(machInst, dest, op1); |
| case 3: |
| return new Base<uint64_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonSTwoMiscUReg(unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1) |
| { |
| switch (size) { |
| case 0: |
| return new Base<int8_t>(machInst, dest, op1); |
| case 1: |
| return new Base<int16_t>(machInst, dest, op1); |
| case 2: |
| return new Base<int32_t>(machInst, dest, op1); |
| case 3: |
| return new Base<int64_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSTwoMiscReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1) |
| { |
| if (q) { |
| return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); |
| } else { |
| return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUTwoMiscReg(bool q, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1) |
| { |
| if (q) { |
| return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); |
| } else { |
| return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size, |
| ExtMachInst machInst, RegIndex dest, |
| RegIndex op1) |
| { |
| if (notSigned) { |
| return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1); |
| } else { |
| return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( |
| q, size, machInst, dest, op1); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (q) { |
| return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); |
| } else { |
| return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (q) { |
| return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); |
| } else { |
| return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (q) { |
| if (size) |
| return new BaseQ<uint64_t>(machInst, dest, op1); |
| else |
| return new BaseQ<uint32_t>(machInst, dest, op1); |
| } else { |
| if (size) |
| return new Unknown(machInst); |
| else |
| return new BaseD<uint32_t>(machInst, dest, op1); |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (size) |
| return new BaseQ<uint64_t>(machInst, dest, op1); |
| else |
| return new BaseD<uint32_t>(machInst, dest, op1); |
| } |
| |
| template <template <typename T> class Base> |
| StaticInstPtr |
| decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (size) |
| return new Base<uint64_t>(machInst, dest, op1); |
| else |
| return new Base<uint32_t>(machInst, dest, op1); |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (q) { |
| switch (size) { |
| case 0x0: |
| return new BaseQ<uint8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseQ<uint16_t>(machInst, dest, op1); |
| case 0x2: |
| return new BaseQ<uint32_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } else { |
| switch (size) { |
| case 0x0: |
| return new BaseD<uint8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseD<uint16_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ, |
| template <typename T> class BaseBQ> |
| StaticInstPtr |
| decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (q) { |
| switch (size) { |
| case 0x0: |
| return new BaseQ<uint8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseQ<uint16_t>(machInst, dest, op1); |
| case 0x2: |
| return new BaseBQ<uint32_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } else { |
| switch (size) { |
| case 0x0: |
| return new BaseD<uint8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseD<uint16_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ> |
| StaticInstPtr |
| decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (q) { |
| switch (size) { |
| case 0x0: |
| return new BaseQ<int8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseQ<int16_t>(machInst, dest, op1); |
| case 0x2: |
| return new BaseQ<int32_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } else { |
| switch (size) { |
| case 0x0: |
| return new BaseD<int8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseD<int16_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ, |
| template <typename T> class BaseBQ> |
| StaticInstPtr |
| decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (q) { |
| switch (size) { |
| case 0x0: |
| return new BaseQ<uint8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseQ<uint16_t>(machInst, dest, op1); |
| case 0x2: |
| return new BaseBQ<uint32_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } else { |
| switch (size) { |
| case 0x0: |
| return new BaseD<uint8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseD<uint16_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| } |
| |
| template <template <typename T> class BaseD, |
| template <typename T> class BaseQ, |
| template <typename T> class BaseBQ> |
| StaticInstPtr |
| decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, |
| RegIndex dest, RegIndex op1) |
| { |
| if (q) { |
| switch (size) { |
| case 0x0: |
| return new BaseQ<int8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseQ<int16_t>(machInst, dest, op1); |
| case 0x2: |
| return new BaseBQ<int32_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } else { |
| switch (size) { |
| case 0x0: |
| return new BaseD<int8_t>(machInst, dest, op1); |
| case 0x1: |
| return new BaseD<int16_t>(machInst, dest, op1); |
| default: |
| return new Unknown(machInst); |
| } |
| } |
| } |
| }}; |
| |
| let {{ |
| header_output = "" |
| exec_output = "" |
| |
| vcompares = ''' |
| static float |
| vcgtFunc(float op1, float op2) |
| { |
| if (std::isnan(op1) || std::isnan(op2)) |
| return 2.0; |
| return (op1 > op2) ? 0.0 : 1.0; |
| } |
| |
| static float |
| vcgeFunc(float op1, float op2) |
| { |
| if (std::isnan(op1) || std::isnan(op2)) |
| return 2.0; |
| return (op1 >= op2) ? 0.0 : 1.0; |
| } |
| |
| static float |
| vceqFunc(float op1, float op2) |
| { |
| if (isSnan(op1) || isSnan(op2)) |
| return 2.0; |
| return (op1 == op2) ? 0.0 : 1.0; |
| } |
| ''' |
| vcomparesL = ''' |
| static float |
| vcleFunc(float op1, float op2) |
| { |
| if (std::isnan(op1) || std::isnan(op2)) |
| return 2.0; |
| return (op1 <= op2) ? 0.0 : 1.0; |
| } |
| |
| static float |
| vcltFunc(float op1, float op2) |
| { |
| if (std::isnan(op1) || std::isnan(op2)) |
| return 2.0; |
| return (op1 < op2) ? 0.0 : 1.0; |
| } |
| ''' |
| vacomparesG = ''' |
| static float |
| vacgtFunc(float op1, float op2) |
| { |
| if (std::isnan(op1) || std::isnan(op2)) |
| return 2.0; |
| return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0; |
| } |
| |
| static float |
| vacgeFunc(float op1, float op2) |
| { |
| if (std::isnan(op1) || std::isnan(op2)) |
| return 2.0; |
| return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0; |
| } |
| ''' |
| |
| exec_output += vcompares + vacomparesG |
| |
| smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t") |
| unsignedTypes = smallUnsignedTypes + ("uint64_t",) |
| smallSignedTypes = ("int8_t", "int16_t", "int32_t") |
| signedTypes = smallSignedTypes + ("int64_t",) |
| smallTypes = smallUnsignedTypes + smallSignedTypes |
| allTypes = unsignedTypes + signedTypes |
| |
| def threeEqualRegInst(name, Name, opClass, types, rCount, op, |
| readDest=False, pairwise=False, byElem=False, |
| standardFpcsr=False, complex=False, extra=''): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcReg1, destReg; |
| ''' |
| if byElem: |
| # 2nd register operand has to be read fully |
| eWalkCode += ''' |
| FullRegVect srcReg2; |
| ''' |
| else: |
| eWalkCode += ''' |
| RegVect srcReg2; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if byElem: |
| # 2nd operand has to be read fully |
| for reg in range(rCount, 4): |
| eWalkCode += ''' |
| srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| |
| readDestCode = '' |
| if standardFpcsr: |
| eWalkCode += ''' |
| FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc); |
| ''' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| |
| if complex: |
| eWalkCode += op |
| elif pairwise: |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| Element srcElem1 = letoh(2 * i < eCount ? |
| srcReg1.elements[2 * i] : |
| srcReg2.elements[2 * i - eCount]); |
| Element srcElem2 = letoh(2 * i < eCount ? |
| srcReg1.elements[2 * i + 1] : |
| srcReg2.elements[2 * i + 1 - eCount]); |
| Element destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| else: |
| eWalkCode += extra |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| Element srcElem1 = letoh(srcReg1.elements[i]); |
| Element srcElem2 = letoh(srcReg2.elements[i]); |
| Element destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| if standardFpcsr: |
| eWalkCode += ''' |
| FpscrExc = fpscr; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegRegImmOp" if byElem else "RegRegRegOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| if byElem: |
| header_output += NeonRegRegRegImmOpDeclare.subst(iop) |
| else: |
| header_output += NeonRegRegRegOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def threeEqualRegInstFp(name, Name, opClass, types, rCount, op, |
| readDest=False, pairwise=False, toInt=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| typedef float FloatVect[rCount]; |
| FloatVect srcRegs1, srcRegs2; |
| ''' |
| if toInt: |
| eWalkCode += 'RegVect destRegs;\n' |
| else: |
| eWalkCode += 'FloatVect destRegs;\n' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcRegs1[%(reg)d] = FpOp1P%(reg)d; |
| srcRegs2[%(reg)d] = FpOp2P%(reg)d; |
| ''' % { "reg" : reg } |
| if readDest: |
| if toInt: |
| eWalkCode += ''' |
| destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; |
| ''' % { "reg" : reg } |
| else: |
| eWalkCode += ''' |
| destRegs[%(reg)d] = FpDestP%(reg)d; |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destReg = destRegs[r];' |
| destType = 'float' |
| writeDest = 'destRegs[r] = destReg;' |
| if toInt: |
| destType = 'uint32_t' |
| writeDest = 'destRegs.regs[r] = destReg;' |
| if pairwise: |
| eWalkCode += ''' |
| for (unsigned r = 0; r < rCount; r++) { |
| float srcReg1 = (2 * r < rCount) ? |
| srcRegs1[2 * r] : srcRegs2[2 * r - rCount]; |
| float srcReg2 = (2 * r < rCount) ? |
| srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount]; |
| %(destType)s destReg; |
| %(readDest)s |
| %(op)s |
| %(writeDest)s |
| } |
| ''' % { "op" : op, |
| "readDest" : readDestCode, |
| "destType" : destType, |
| "writeDest" : writeDest } |
| else: |
| eWalkCode += ''' |
| for (unsigned r = 0; r < rCount; r++) { |
| float srcReg1 = srcRegs1[r]; |
| float srcReg2 = srcRegs2[r]; |
| %(destType)s destReg; |
| %(readDest)s |
| %(op)s |
| %(writeDest)s |
| } |
| ''' % { "op" : op, |
| "readDest" : readDestCode, |
| "destType" : destType, |
| "writeDest" : writeDest } |
| for reg in range(rCount): |
| if toInt: |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; |
| ''' % { "reg" : reg } |
| else: |
| eWalkCode += ''' |
| FpDestP%(reg)d = destRegs[%(reg)d]; |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "FpRegRegRegOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegRegOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def threeUnequalRegInst(name, Name, opClass, types, op, |
| bigSrc1, bigSrc2, bigDest, readDest): |
| global header_output, exec_output |
| src1Cnt = src2Cnt = destCnt = 2 |
| src1Prefix = src2Prefix = destPrefix = '' |
| if bigSrc1: |
| src1Cnt = 4 |
| src1Prefix = 'Big' |
| if bigSrc2: |
| src2Cnt = 4 |
| src2Prefix = 'Big' |
| if bigDest: |
| destCnt = 4 |
| destPrefix = 'Big' |
| eWalkCode = simdEnabledCheckCode + ''' |
| %sRegVect srcReg1; |
| %sRegVect srcReg2; |
| %sRegVect destReg; |
| ''' % (src1Prefix, src2Prefix, destPrefix) |
| for reg in range(src1Cnt): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| for reg in range(src2Cnt): |
| eWalkCode += ''' |
| srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| for reg in range(destCnt): |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| %(src1Prefix)sElement srcElem1 = letoh(srcReg1.elements[i]); |
| %(src1Prefix)sElement srcElem2 = letoh(srcReg2.elements[i]); |
| %(destPrefix)sElement destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode, |
| "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, |
| "destPrefix" : destPrefix } |
| for reg in range(destCnt): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegRegOp", |
| { "code": eWalkCode, |
| "r_count": 2, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegRegOpDeclare.subst(iop) |
| exec_output += NeonUnequalRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False): |
| threeUnequalRegInst(name, Name, opClass, types, op, |
| True, True, False, readDest) |
| |
| def threeRegLongInst(name, Name, opClass, types, op, readDest=False): |
| threeUnequalRegInst(name, Name, opClass, types, op, |
| False, False, True, readDest) |
| |
| def threeRegWideInst(name, Name, opClass, types, op, readDest=False): |
| threeUnequalRegInst(name, Name, opClass, types, op, |
| True, False, True, readDest) |
| |
| def twoEqualRegInst(name, Name, opClass, types, rCount, op, |
| readDest=False, extra=''): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcReg1, srcReg2, destReg; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += extra |
| eWalkCode += ''' |
| if (imm >= eCount) { |
| return std::make_shared<UndefinedInstruction>(machInst, false, |
| mnemonic); |
| } else { |
| for (unsigned i = 0; i < eCount; i++) { |
| Element srcElem1 = letoh(srcReg1.elements[i]); |
| Element srcElem2 = letoh(srcReg2.elements[imm]); |
| Element destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegRegImmOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegRegImmOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegLongInst(name, Name, opClass, types, op, readDest=False): |
| global header_output, exec_output |
| rCount = 2 |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcReg1, srcReg2; |
| BigRegVect destReg = {}; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw);; |
| ''' % { "reg" : reg } |
| if readDest: |
| for reg in range(2 * rCount): |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| if (imm >= eCount) { |
| fault = std::make_shared<UndefinedInstruction>(machInst, false, |
| mnemonic); |
| } else { |
| for (unsigned i = 0; i < eCount; i++) { |
| Element srcElem1 = letoh(srcReg1.elements[i]); |
| Element srcElem2 = letoh(srcReg2.elements[imm]); |
| BigElement destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(2 * rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegRegImmOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegRegImmOpDeclare.subst(iop) |
| exec_output += NeonUnequalRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| typedef float FloatVect[rCount]; |
| FloatVect srcRegs1, srcRegs2, destRegs; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcRegs1[%(reg)d] = FpOp1P%(reg)d; |
| srcRegs2[%(reg)d] = FpOp2P%(reg)d; |
| ''' % { "reg" : reg } |
| if readDest: |
| eWalkCode += ''' |
| destRegs[%(reg)d] = FpDestP%(reg)d; |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destReg = destRegs[i];' |
| eWalkCode += ''' |
| if (imm >= eCount) { |
| return std::make_shared<UndefinedInstruction>(machInst, false, |
| mnemonic); |
| } else { |
| for (unsigned i = 0; i < rCount; i++) { |
| float srcReg1 = srcRegs1[i]; |
| float srcReg2 = srcRegs2[imm]; |
| float destReg; |
| %(readDest)s |
| %(op)s |
| destRegs[i] = destReg; |
| } |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d = destRegs[%(reg)d]; |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "FpRegRegRegImmOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegRegImmOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegShiftInst(name, Name, opClass, types, rCount, op, |
| readDest=False, toInt=False, fromInt=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcRegs1, destRegs; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcRegs1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| eWalkCode += ''' |
| destRegs.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destRegs.elements[i]);' |
| if toInt: |
| readDestCode = 'destReg = letoh(destRegs.regs[i]);' |
| readOpCode = 'Element srcElem1 = letoh(srcRegs1.elements[i]);' |
| if fromInt: |
| readOpCode = 'uint32_t srcReg1 = letoh(srcRegs1.regs[i]);' |
| declDest = 'Element destElem;' |
| writeDestCode = 'destRegs.elements[i] = htole(destElem);' |
| if toInt: |
| declDest = 'uint32_t destReg;' |
| writeDestCode = 'destRegs.regs[i] = htole(destReg);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| %(readOp)s |
| %(declDest)s |
| %(readDest)s |
| %(op)s |
| %(writeDest)s |
| } |
| ''' % { "readOp" : readOpCode, |
| "declDest" : declDest, |
| "readDest" : readDestCode, |
| "op" : op, |
| "writeDest" : writeDestCode } |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destRegs.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegImmOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegImmOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| BigRegVect srcReg1; |
| RegVect destReg; |
| ''' |
| for reg in range(4): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| for reg in range(2): |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| BigElement srcElem1 = letoh(srcReg1.elements[i]); |
| Element destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(2): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegImmOp", |
| { "code": eWalkCode, |
| "r_count": 2, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegImmOpDeclare.subst(iop) |
| exec_output += NeonUnequalRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcReg1; |
| BigRegVect destReg = {}; |
| ''' |
| for reg in range(2): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| for reg in range(4): |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destReg = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| Element srcElem1 = letoh(srcReg1.elements[i]); |
| BigElement destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(4): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegImmOp", |
| { "code": eWalkCode, |
| "r_count": 2, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegImmOpDeclare.subst(iop) |
| exec_output += NeonUnequalRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcReg1, destReg; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| unsigned j = i; |
| Element srcElem1 = letoh(srcReg1.elements[i]); |
| Element destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[j] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcReg1, destReg; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| Element srcElem1 = letoh(srcReg1.elements[imm]); |
| Element destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegImmOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegImmOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcReg1, destReg; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| eWalkCode += ''' |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += op |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| FpOp1P%(reg)d_uw = letoh(srcReg1.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegMiscInstFp(name, Name, opClass, types, rCount, op, |
| readDest=False, toInt=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| typedef float FloatVect[rCount]; |
| FloatVect srcRegs1; |
| ''' |
| if toInt: |
| eWalkCode += 'RegVect destRegs;\n' |
| else: |
| eWalkCode += 'FloatVect destRegs;\n' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcRegs1[%(reg)d] = FpOp1P%(reg)d; |
| ''' % { "reg" : reg } |
| if readDest: |
| if toInt: |
| eWalkCode += ''' |
| destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; |
| ''' % { "reg" : reg } |
| else: |
| eWalkCode += ''' |
| destRegs[%(reg)d] = FpDestP%(reg)d; |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destReg = destRegs[i];' |
| destType = 'float' |
| writeDest = 'destRegs[r] = destReg;' |
| if toInt: |
| destType = 'uint32_t' |
| writeDest = 'destRegs.regs[r] = destReg;' |
| eWalkCode += ''' |
| for (unsigned r = 0; r < rCount; r++) { |
| float srcReg1 = srcRegs1[r]; |
| %(destType)s destReg; |
| %(readDest)s |
| %(op)s |
| %(writeDest)s |
| } |
| ''' % { "op" : op, |
| "readDest" : readDestCode, |
| "destType" : destType, |
| "writeDest" : writeDest } |
| for reg in range(rCount): |
| if toInt: |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; |
| ''' % { "reg" : reg } |
| else: |
| eWalkCode += ''' |
| FpDestP%(reg)d = destRegs[%(reg)d]; |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "FpRegRegOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcRegs; |
| BigRegVect destReg = {}; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcRegs.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount / 2; i++) { |
| Element srcElem1 = letoh(srcRegs.elements[2 * i]); |
| Element srcElem2 = letoh(srcRegs.elements[2 * i + 1]); |
| BigElement destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegOpDeclare.subst(iop) |
| exec_output += NeonUnequalRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| BigRegVect srcReg1; |
| RegVect destReg; |
| ''' |
| for reg in range(4): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| for reg in range(2): |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| BigElement srcElem1 = letoh(srcReg1.elements[i]); |
| Element destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(2): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegOp", |
| { "code": eWalkCode, |
| "r_count": 2, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegOpDeclare.subst(iop) |
| exec_output += NeonUnequalRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect destReg; |
| ''' |
| if readDest: |
| for reg in range(rCount): |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destElem = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| Element destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegImmOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegImmOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcReg1; |
| BigRegVect destReg = {}; |
| ''' |
| for reg in range(2): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| if readDest: |
| for reg in range(4): |
| eWalkCode += ''' |
| destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw); |
| ''' % { "reg" : reg } |
| readDestCode = '' |
| if readDest: |
| readDestCode = 'destReg = letoh(destReg.elements[i]);' |
| eWalkCode += ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| Element srcElem1 = letoh(srcReg1.elements[i]); |
| BigElement destElem; |
| %(readDest)s |
| %(op)s |
| destReg.elements[i] = htole(destElem); |
| } |
| ''' % { "op" : op, "readDest" : readDestCode } |
| for reg in range(4): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegOp", |
| { "code": eWalkCode, |
| "r_count": 2, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegOpDeclare.subst(iop) |
| exec_output += NeonUnequalRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| vhaddCode = ''' |
| Element carryBit = |
| (((unsigned)srcElem1 & 0x1) + |
| ((unsigned)srcElem2 & 0x1)) >> 1; |
| // Use division instead of a shift to ensure the sign extension works |
| // right. The compiler will figure out if it can be a shift. Mask the |
| // inputs so they get truncated correctly. |
| destElem = (((srcElem1 & ~(Element)1) / 2) + |
| ((srcElem2 & ~(Element)1) / 2)) + carryBit; |
| ''' |
| threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode) |
| threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode) |
| |
| vrhaddCode = ''' |
| Element carryBit = |
| (((unsigned)srcElem1 & 0x1) + |
| ((unsigned)srcElem2 & 0x1) + 1) >> 1; |
| // Use division instead of a shift to ensure the sign extension works |
| // right. The compiler will figure out if it can be a shift. Mask the |
| // inputs so they get truncated correctly. |
| destElem = (((srcElem1 & ~(Element)1) / 2) + |
| ((srcElem2 & ~(Element)1) / 2)) + carryBit; |
| ''' |
| threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode) |
| threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode) |
| |
| vhsubCode = ''' |
| Element barrowBit = |
| (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; |
| // Use division instead of a shift to ensure the sign extension works |
| // right. The compiler will figure out if it can be a shift. Mask the |
| // inputs so they get truncated correctly. |
| destElem = (((srcElem1 & ~(Element)1) / 2) - |
| ((srcElem2 & ~(Element)1) / 2)) - barrowBit; |
| ''' |
| threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode) |
| threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode) |
| |
| vandCode = ''' |
| destElem = srcElem1 & srcElem2; |
| ''' |
| threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode) |
| threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode) |
| |
| vbicCode = ''' |
| destElem = srcElem1 & ~srcElem2; |
| ''' |
| threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode) |
| threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode) |
| |
| vorrCode = ''' |
| destElem = srcElem1 | srcElem2; |
| ''' |
| threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode) |
| threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode) |
| |
| threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode) |
| threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode) |
| |
| vornCode = ''' |
| destElem = srcElem1 | ~srcElem2; |
| ''' |
| threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode) |
| threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode) |
| |
| veorCode = ''' |
| destElem = srcElem1 ^ srcElem2; |
| ''' |
| threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode) |
| threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode) |
| |
| vbifCode = ''' |
| destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2); |
| ''' |
| threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True) |
| threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True) |
| vbitCode = ''' |
| destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2); |
| ''' |
| threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True) |
| threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True) |
| vbslCode = ''' |
| destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem); |
| ''' |
| threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True) |
| threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True) |
| |
| vmaxCode = ''' |
| destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2; |
| ''' |
| threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode) |
| threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode) |
| |
| vminCode = ''' |
| destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2; |
| ''' |
| threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode) |
| threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode) |
| |
| vaddCode = ''' |
| destElem = srcElem1 + srcElem2; |
| ''' |
| threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode) |
| threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode) |
| |
| threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes, |
| 2, vaddCode, pairwise=True) |
| vaddlwCode = ''' |
| destElem = (BigElement)srcElem1 + (BigElement)srcElem2; |
| ''' |
| threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode) |
| threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode) |
| vaddhnCode = ''' |
| destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> |
| (sizeof(Element) * 8); |
| ''' |
| threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode) |
| vraddhnCode = ''' |
| destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + |
| ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> |
| (sizeof(Element) * 8); |
| ''' |
| threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode) |
| |
| vsubCode = ''' |
| destElem = srcElem1 - srcElem2; |
| ''' |
| threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode) |
| threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode) |
| vsublwCode = ''' |
| destElem = (BigElement)srcElem1 - (BigElement)srcElem2; |
| ''' |
| threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode) |
| threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode) |
| |
| vqaddUCode = ''' |
| destElem = srcElem1 + srcElem2; |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (destElem < srcElem1 || destElem < srcElem2) { |
| destElem = (Element)(-1); |
| fpscr.qc = 1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode) |
| threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode) |
| vsubhnCode = ''' |
| destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> |
| (sizeof(Element) * 8); |
| ''' |
| threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode) |
| vrsubhnCode = ''' |
| destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + |
| ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> |
| (sizeof(Element) * 8); |
| ''' |
| threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode) |
| |
| vcaddCode = ''' |
| bool rot = bits(machInst, 24); |
| Element el1; |
| Element el3; |
| |
| for (int i = 0; i < eCount/2; ++i) { |
| Element srcElem1_1 = letoh(srcReg1.elements[2*i]); |
| Element srcElem1_2 = letoh(srcReg1.elements[2*i+1]); |
| Element srcElem2_1 = letoh(srcReg2.elements[2*i]); |
| Element srcElem2_2 = letoh(srcReg2.elements[2*i+1]); |
| Element destElem_1; |
| Element destElem_2; |
| if (rot) { |
| el1 = srcElem2_2; |
| el3 = fplibNeg<Element>(srcElem2_1); |
| } else { |
| el1 = fplibNeg<Element>(srcElem2_2); |
| el3 = srcElem2_1; |
| } |
| |
| destElem_1 = fplibAdd<Element>(srcElem1_1, el1, fpscr); |
| destElem_2 = fplibAdd<Element>(srcElem1_2, el3, fpscr); |
| destReg.elements[2*i] = htole(destElem_1); |
| destReg.elements[2*i+1] = htole(destElem_2); |
| } |
| ''' |
| |
| # VCADD |
| threeEqualRegInst("vcadd", "VcaddD", "SimdFloatAddOp", |
| ("uint16_t", "uint32_t"), 2, vcaddCode, |
| standardFpcsr=True, complex=True) |
| threeEqualRegInst("vcadd", "VcaddQ", "SimdFloatAddOp", |
| ("uint16_t", "uint32_t"), 4, |
| vcaddCode, standardFpcsr=True, complex=True) |
| |
| vcmlaCode = ''' |
| uint8_t rot = bits(machInst, %(rot)s); |
| Element el1; |
| Element el2; |
| Element el3; |
| Element el4; |
| for (int i = 0; i < eCount/2; ++i) { |
| |
| Element srcElem1_1 = letoh(srcReg1.elements[2*i]); |
| Element srcElem1_2 = letoh(srcReg1.elements[2*i+1]); |
| Element srcElem2_1 = letoh(srcReg2.elements[2*%(index)s]); |
| Element srcElem2_2 = letoh(srcReg2.elements[2*%(index)s+1]); |
| Element destElem_1 = letoh(destReg.elements[2*i]); |
| Element destElem_2 = letoh(destReg.elements[2*i+1]); |
| |
| switch (rot) { |
| case 0x0: |
| { |
| el1 = srcElem2_1; |
| el2 = srcElem1_1; |
| el3 = srcElem2_2; |
| el4 = srcElem1_1; |
| break; |
| } |
| case 0x1: |
| { |
| el1 = fplibNeg<Element>(srcElem2_2); |
| el2 = srcElem1_2; |
| el3 = srcElem2_1; |
| el4 = srcElem1_2; |
| break; |
| } |
| case 0x2: |
| { |
| el1 = fplibNeg<Element>(srcElem2_1); |
| el2 = srcElem1_1; |
| el3 = fplibNeg<Element>(srcElem2_2); |
| el4 = srcElem1_1; |
| break; |
| } |
| case 0x3: |
| { |
| el1 = srcElem2_2; |
| el2 = srcElem1_2; |
| el3 = fplibNeg<Element>(srcElem2_1); |
| el4 = srcElem1_2; |
| break; |
| } |
| } |
| |
| destElem_1 = fplibMulAdd<Element>(destElem_1, el2, el1, fpscr); |
| destElem_2 = fplibMulAdd<Element>(destElem_2, el4, el3, fpscr); |
| |
| destReg.elements[2*i] = htole(destElem_1); |
| destReg.elements[2*i+1] = htole(destElem_2); |
| } |
| ''' |
| |
| # VCMLA (by element) |
| vcmla_imm = vcmlaCode % {'rot': '21, 20', 'index': 'imm'} |
| threeEqualRegInst("vcmla", "VcmlaElemD", "SimdFloatMultAccOp", |
| ("uint16_t", "uint32_t"), 2, vcmla_imm, |
| readDest=True, byElem=True, standardFpcsr=True, |
| complex=True) |
| threeEqualRegInst("vcmla", "VcmlaElemQ", "SimdFloatMultAccOp", |
| ("uint16_t", "uint32_t"), 4, vcmla_imm, |
| readDest=True, byElem=True, standardFpcsr=True, |
| complex=True) |
| |
| # FCMLA (vector) |
| vcmla_vec = vcmlaCode % {'rot': '24, 23', 'index': 'i'} |
| threeEqualRegInst("vcmla", "VcmlaD", "SimdFloatMultAccOp", |
| ("uint16_t", "uint32_t"), 2, vcmla_vec, |
| readDest=True, standardFpcsr=True, complex=True) |
| threeEqualRegInst("vcmla", "VcmlaQ", "SimdFloatMultAccOp", |
| ("uint16_t", "uint32_t"), 4, vcmla_vec, |
| readDest=True, standardFpcsr=True, complex=True) |
| |
| vqaddSCode = ''' |
| destElem = srcElem1 + srcElem2; |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| bool negDest = (destElem < 0); |
| bool negSrc1 = (srcElem1 < 0); |
| bool negSrc2 = (srcElem2 < 0); |
| if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { |
| if (negDest) |
| /* If (>=0) plus (>=0) yields (<0), saturate to +. */ |
| destElem = std::numeric_limits<Element>::max(); |
| else |
| /* If (<0) plus (<0) yields (>=0), saturate to -. */ |
| destElem = std::numeric_limits<Element>::min(); |
| fpscr.qc = 1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode) |
| threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode) |
| |
| vqsubUCode = ''' |
| destElem = srcElem1 - srcElem2; |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (destElem > srcElem1) { |
| destElem = 0; |
| fpscr.qc = 1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode) |
| threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode) |
| |
| vqsubSCode = ''' |
| destElem = srcElem1 - srcElem2; |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| bool negDest = (destElem < 0); |
| bool negSrc1 = (srcElem1 < 0); |
| bool posSrc2 = (srcElem2 >= 0); |
| if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { |
| if (negDest) |
| /* If (>=0) minus (<0) yields (<0), saturate to +. */ |
| destElem = std::numeric_limits<Element>::max(); |
| else |
| /* If (<0) minus (>=0) yields (>=0), saturate to -. */ |
| destElem = std::numeric_limits<Element>::min(); |
| fpscr.qc = 1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode) |
| threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode) |
| |
| vcgtCode = ''' |
| destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0; |
| ''' |
| threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode) |
| threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode) |
| |
| vcgeCode = ''' |
| destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0; |
| ''' |
| threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode) |
| threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode) |
| |
| vceqCode = ''' |
| destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0; |
| ''' |
| threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode) |
| threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode) |
| |
| vshlCode = ''' |
| int16_t shiftAmt = (int8_t)srcElem2; |
| if (shiftAmt < 0) { |
| shiftAmt = -shiftAmt; |
| if (shiftAmt >= sizeof(Element) * 8) { |
| shiftAmt = sizeof(Element) * 8 - 1; |
| destElem = 0; |
| } else { |
| destElem = (srcElem1 >> shiftAmt); |
| } |
| // Make sure the right shift sign extended when it should. |
| if (ltz(srcElem1) && !ltz(destElem)) { |
| destElem |= -((Element)1 << (sizeof(Element) * 8 - |
| 1 - shiftAmt)); |
| } |
| } else { |
| if (shiftAmt >= sizeof(Element) * 8) { |
| destElem = 0; |
| } else { |
| destElem = srcElem1 << shiftAmt; |
| } |
| } |
| ''' |
| threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode) |
| threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode) |
| |
| vrshlCode = ''' |
| int16_t shiftAmt = (int8_t)srcElem2; |
| if (shiftAmt < 0) { |
| shiftAmt = -shiftAmt; |
| Element rBit = 0; |
| if (shiftAmt <= sizeof(Element) * 8) |
| rBit = bits(srcElem1, shiftAmt - 1); |
| if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) |
| rBit = 1; |
| if (shiftAmt >= sizeof(Element) * 8) { |
| shiftAmt = sizeof(Element) * 8 - 1; |
| destElem = 0; |
| } else { |
| destElem = (srcElem1 >> shiftAmt); |
| } |
| // Make sure the right shift sign extended when it should. |
| if (ltz(srcElem1) && !ltz(destElem)) { |
| destElem |= -((Element)1 << (sizeof(Element) * 8 - |
| 1 - shiftAmt)); |
| } |
| destElem += rBit; |
| } else if (shiftAmt > 0) { |
| if (shiftAmt >= sizeof(Element) * 8) { |
| destElem = 0; |
| } else { |
| destElem = srcElem1 << shiftAmt; |
| } |
| } else { |
| destElem = srcElem1; |
| } |
| ''' |
| threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode) |
| threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode) |
| |
| vqshlUCode = ''' |
| int16_t shiftAmt = (int8_t)srcElem2; |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (shiftAmt < 0) { |
| shiftAmt = -shiftAmt; |
| if (shiftAmt >= sizeof(Element) * 8) { |
| shiftAmt = sizeof(Element) * 8 - 1; |
| destElem = 0; |
| } else { |
| destElem = (srcElem1 >> shiftAmt); |
| } |
| } else if (shiftAmt > 0) { |
| if (shiftAmt >= sizeof(Element) * 8) { |
| if (srcElem1 != 0) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } else { |
| destElem = 0; |
| } |
| } else { |
| if (bits(srcElem1, sizeof(Element) * 8 - 1, |
| sizeof(Element) * 8 - shiftAmt)) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } else { |
| destElem = srcElem1 << shiftAmt; |
| } |
| } |
| } else { |
| destElem = srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode) |
| threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode) |
| |
| vqshlSCode = ''' |
| int16_t shiftAmt = (int8_t)srcElem2; |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (shiftAmt < 0) { |
| shiftAmt = -shiftAmt; |
| if (shiftAmt >= sizeof(Element) * 8) { |
| shiftAmt = sizeof(Element) * 8 - 1; |
| destElem = 0; |
| } else { |
| destElem = (srcElem1 >> shiftAmt); |
| } |
| // Make sure the right shift sign extended when it should. |
| if (srcElem1 < 0 && destElem >= 0) { |
| destElem |= -((Element)1 << (sizeof(Element) * 8 - |
| 1 - shiftAmt)); |
| } |
| } else if (shiftAmt > 0) { |
| bool sat = false; |
| if (shiftAmt >= sizeof(Element) * 8) { |
| if (srcElem1 != 0) |
| sat = true; |
| else |
| destElem = 0; |
| } else { |
| if (bits(srcElem1, sizeof(Element) * 8 - 1, |
| sizeof(Element) * 8 - 1 - shiftAmt) != |
| ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { |
| sat = true; |
| } else { |
| destElem = srcElem1 << shiftAmt; |
| } |
| } |
| if (sat) { |
| fpscr.qc = 1; |
| destElem = mask(sizeof(Element) * 8 - 1); |
| if (srcElem1 < 0) |
| destElem = ~destElem; |
| } |
| } else { |
| destElem = srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode) |
| threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode) |
| |
| vqrshlUCode = ''' |
| int16_t shiftAmt = (int8_t)srcElem2; |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (shiftAmt < 0) { |
| shiftAmt = -shiftAmt; |
| Element rBit = 0; |
| if (shiftAmt <= sizeof(Element) * 8) |
| rBit = bits(srcElem1, shiftAmt - 1); |
| if (shiftAmt >= sizeof(Element) * 8) { |
| shiftAmt = sizeof(Element) * 8 - 1; |
| destElem = 0; |
| } else { |
| destElem = (srcElem1 >> shiftAmt); |
| } |
| destElem += rBit; |
| } else { |
| if (shiftAmt >= sizeof(Element) * 8) { |
| if (srcElem1 != 0) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } else { |
| destElem = 0; |
| } |
| } else { |
| if (bits(srcElem1, sizeof(Element) * 8 - 1, |
| sizeof(Element) * 8 - shiftAmt)) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } else { |
| destElem = srcElem1 << shiftAmt; |
| } |
| } |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode) |
| threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode) |
| |
| vqrshlSCode = ''' |
| int16_t shiftAmt = (int8_t)srcElem2; |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (shiftAmt < 0) { |
| shiftAmt = -shiftAmt; |
| Element rBit = 0; |
| if (shiftAmt <= sizeof(Element) * 8) |
| rBit = bits(srcElem1, shiftAmt - 1); |
| if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) |
| rBit = 1; |
| if (shiftAmt >= sizeof(Element) * 8) { |
| shiftAmt = sizeof(Element) * 8 - 1; |
| destElem = 0; |
| } else { |
| destElem = (srcElem1 >> shiftAmt); |
| } |
| // Make sure the right shift sign extended when it should. |
| if (srcElem1 < 0 && destElem >= 0) { |
| destElem |= -((Element)1 << (sizeof(Element) * 8 - |
| 1 - shiftAmt)); |
| } |
| destElem += rBit; |
| } else if (shiftAmt > 0) { |
| bool sat = false; |
| if (shiftAmt >= sizeof(Element) * 8) { |
| if (srcElem1 != 0) |
| sat = true; |
| else |
| destElem = 0; |
| } else { |
| if (bits(srcElem1, sizeof(Element) * 8 - 1, |
| sizeof(Element) * 8 - 1 - shiftAmt) != |
| ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { |
| sat = true; |
| } else { |
| destElem = srcElem1 << shiftAmt; |
| } |
| } |
| if (sat) { |
| fpscr.qc = 1; |
| destElem = mask(sizeof(Element) * 8 - 1); |
| if (srcElem1 < 0) |
| destElem = ~destElem; |
| } |
| } else { |
| destElem = srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode) |
| threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode) |
| |
| vabaCode = ''' |
| destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : |
| (srcElem2 - srcElem1); |
| ''' |
| threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True) |
| threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True) |
| vabalCode = ''' |
| destElem += (srcElem1 > srcElem2) ? |
| ((BigElement)srcElem1 - (BigElement)srcElem2) : |
| ((BigElement)srcElem2 - (BigElement)srcElem1); |
| ''' |
| threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True) |
| |
| vabdCode = ''' |
| destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : |
| (srcElem2 - srcElem1); |
| ''' |
| threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode) |
| threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode) |
| vabdlCode = ''' |
| destElem = (srcElem1 > srcElem2) ? |
| ((BigElement)srcElem1 - (BigElement)srcElem2) : |
| ((BigElement)srcElem2 - (BigElement)srcElem1); |
| ''' |
| threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode) |
| |
| vtstCode = ''' |
| destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0; |
| ''' |
| threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode) |
| threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode) |
| |
| vmulCode = ''' |
| destElem = srcElem1 * srcElem2; |
| ''' |
| threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode) |
| threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode) |
| vmullCode = ''' |
| destElem = (BigElement)srcElem1 * (BigElement)srcElem2; |
| ''' |
| threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode) |
| |
| vmlaCode = ''' |
| destElem = destElem + srcElem1 * srcElem2; |
| ''' |
| threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True) |
| threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True) |
| vmlalCode = ''' |
| destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2; |
| ''' |
| threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True) |
| |
| vqdmlalCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); |
| Element maxNeg = std::numeric_limits<Element>::min(); |
| Element halfNeg = maxNeg / 2; |
| if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || |
| (srcElem1 == halfNeg && srcElem2 == maxNeg) || |
| (srcElem1 == maxNeg && srcElem2 == halfNeg)) { |
| midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); |
| fpscr.qc = 1; |
| } |
| bool negPreDest = ltz(destElem); |
| destElem += midElem; |
| bool negDest = ltz(destElem); |
| bool negMid = ltz(midElem); |
| if (negPreDest == negMid && negMid != negDest) { |
| destElem = mask(sizeof(BigElement) * 8 - 1); |
| if (negPreDest) |
| destElem = ~destElem; |
| fpscr.qc = 1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True) |
| |
| vqdmlslCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); |
| Element maxNeg = std::numeric_limits<Element>::min(); |
| Element halfNeg = maxNeg / 2; |
| if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || |
| (srcElem1 == halfNeg && srcElem2 == maxNeg) || |
| (srcElem1 == maxNeg && srcElem2 == halfNeg)) { |
| midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); |
| fpscr.qc = 1; |
| } |
| bool negPreDest = ltz(destElem); |
| destElem -= midElem; |
| bool negDest = ltz(destElem); |
| bool posMid = ltz((BigElement)-midElem); |
| if (negPreDest == posMid && posMid != negDest) { |
| destElem = mask(sizeof(BigElement) * 8 - 1); |
| if (negPreDest) |
| destElem = ~destElem; |
| fpscr.qc = 1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True) |
| |
| vqdmullCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); |
| if (srcElem1 == srcElem2 && |
| srcElem1 == (Element)(std::numeric_limits<Element>::min())) { |
| destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); |
| fpscr.qc = 1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode) |
| |
| vmlsCode = ''' |
| destElem = destElem - srcElem1 * srcElem2; |
| ''' |
| threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) |
| threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) |
| vmlslCode = ''' |
| destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2; |
| ''' |
| threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True) |
| |
| vmulpCode = ''' |
| destElem = 0; |
| for (unsigned j = 0; j < sizeof(Element) * 8; j++) { |
| if (bits(srcElem2, j)) |
| destElem ^= srcElem1 << j; |
| } |
| ''' |
| threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode) |
| threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode) |
| vmullpCode = ''' |
| destElem = 0; |
| for (unsigned j = 0; j < sizeof(Element) * 8; j++) { |
| if (bits(srcElem2, j)) |
| destElem ^= (BigElement)srcElem1 << j; |
| } |
| ''' |
| threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode) |
| |
| threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True) |
| |
| threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True) |
| |
| vqdmulhCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> |
| (sizeof(Element) * 8); |
| if (srcElem1 == srcElem2 && |
| srcElem1 == (Element)(std::numeric_limits<Element>::min())) { |
| destElem = ~srcElem1; |
| fpscr.qc = 1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) |
| threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) |
| |
| |
| vqrdmCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| int nbits = sizeof(Element)*8; |
| |
| auto val_max = std::numeric_limits<Element>::max(); |
| auto val_min = std::numeric_limits<Element>::min(); |
| BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s |
| ((BigElement)srcElem1 * (BigElement)srcElem2 * 2) + |
| ((BigElement)1 << (nbits - 1)); |
| unsat_value >>= nbits; |
| |
| if (unsat_value > val_max) { |
| fpscr.qc = 1; |
| destElem = val_max; |
| } else if (unsat_value < val_min) { |
| fpscr.qc = 1; |
| destElem = val_min; |
| } else { |
| destElem = unsat_value; |
| } |
| FpscrQc = fpscr; |
| ''' |
| code_add = "+" |
| vqrdmlahCode = vqrdmCode % {'code': code_add} |
| rdm_check = ''' |
| int sz = bits(machInst, 21, 20); |
| RegVal isar5 = xc->tcBase()->readMiscReg(MISCREG_ID_ISAR5); |
| if (!(bits(isar5, 27, 24) == 0x1) || sz == 3 || sz == 0) |
| return std::make_shared<UndefinedInstruction>(machInst, true); |
| typedef __int128_t BigElement; |
| ''' |
| threeEqualRegInst("vqrdmlah", "VqrdmlahD", |
| "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True, |
| extra=rdm_check) |
| threeEqualRegInst("vqrdmlah", "VqrdmlahQ", |
| "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True, |
| extra=rdm_check) |
| |
| code_sub = "-" |
| vqrdmlshCode = vqrdmCode % {'code': code_sub} |
| threeEqualRegInst("vqrdmlsh", "VqrdmlshD", |
| "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True, |
| extra=rdm_check) |
| threeEqualRegInst("vqrdmlsh", "VqrdmlshQ", |
| "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True, |
| extra=rdm_check) |
| |
| |
| vqrdmulhCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + |
| ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> |
| (sizeof(Element) * 8); |
| Element maxNeg = std::numeric_limits<Element>::min(); |
| Element halfNeg = maxNeg / 2; |
| if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || |
| (srcElem1 == halfNeg && srcElem2 == maxNeg) || |
| (srcElem1 == maxNeg && srcElem2 == halfNeg)) { |
| if (destElem < 0) { |
| destElem = mask(sizeof(Element) * 8 - 1); |
| } else { |
| destElem = std::numeric_limits<Element>::min(); |
| } |
| fpscr.qc = 1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| threeEqualRegInst("vqrdmulh", "VqrdmulhD", |
| "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) |
| threeEqualRegInst("vqrdmulh", "VqrdmulhQ", |
| "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) |
| |
| vMinMaxFpCode = ''' |
| destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr); |
| ''' |
| vMinMaxInsts = [ |
| ("vmax", "VmaxDFp", 2, "Max", False, ), |
| ("vmax", "VmaxQFp", 4, "Max", False, ), |
| ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ), |
| ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ), |
| ("vpmax", "VpmaxDFp", 2, "Max", True, ), |
| ("vpmax", "VpmaxQFp", 4, "Max", True, ), |
| ("vmin", "VminDFp", 2, "Min", False, ), |
| ("vmin", "VminQFp", 4, "Min", False, ), |
| ("vminnm", "VminnmDFp", 2, "MinNum", False, ), |
| ("vminnm", "VminnmQFp", 4, "MinNum", False, ), |
| ("vpmin", "VpminDFp", 2, "Min", True, ), |
| ("vpmin", "VpminQFp", 4, "Min", True, ), |
| ] |
| for name, Name, rCount, op, pairwise in vMinMaxInsts: |
| threeEqualRegInst( |
| name, |
| Name, |
| "SimdFloatCmpOp", |
| ("uint32_t",), |
| rCount, |
| vMinMaxFpCode % op, |
| pairwise=pairwise, |
| standardFpcsr=True, |
| ) |
| |
| vaddfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, |
| true, true, VfpRoundNearest); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode) |
| threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode) |
| |
| threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",), |
| 2, vaddfpCode, pairwise=True) |
| threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",), |
| 4, vaddfpCode, pairwise=True) |
| |
| vsubfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, |
| true, true, VfpRoundNearest); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode) |
| threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode) |
| |
| vmulfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, |
| true, true, VfpRoundNearest); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) |
| threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) |
| |
| vmlafpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, |
| true, true, VfpRoundNearest); |
| destReg = binaryOp(fpscr, mid, destReg, fpAddS, |
| true, true, VfpRoundNearest); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) |
| threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) |
| |
| vfmafpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>, |
| true, true, VfpRoundNearest); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True) |
| threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True) |
| |
| vfmsfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>, |
| true, true, VfpRoundNearest); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True) |
| threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True) |
| |
| vmlsfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, |
| true, true, VfpRoundNearest); |
| destReg = binaryOp(fpscr, destReg, mid, fpSubS, |
| true, true, VfpRoundNearest); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) |
| threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) |
| |
| vcgtfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",), |
| 2, vcgtfpCode, toInt = True) |
| threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",), |
| 4, vcgtfpCode, toInt = True) |
| |
| vcgefpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",), |
| 2, vcgefpCode, toInt = True) |
| threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",), |
| 4, vcgefpCode, toInt = True) |
| |
| vacgtfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",), |
| 2, vacgtfpCode, toInt = True) |
| threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",), |
| 4, vacgtfpCode, toInt = True) |
| |
| vacgefpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",), |
| 2, vacgefpCode, toInt = True) |
| threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",), |
| 4, vacgefpCode, toInt = True) |
| |
| vceqfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",), |
| 2, vceqfpCode, toInt = True) |
| threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",), |
| 4, vceqfpCode, toInt = True) |
| |
| vrecpsCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, |
| true, true, VfpRoundNearest); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode) |
| threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode) |
| |
| vrsqrtsCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, |
| true, true, VfpRoundNearest); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode) |
| threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode) |
| |
| vabdfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, |
| true, true, VfpRoundNearest); |
| destReg = fabs(mid); |
| FpscrExc = fpscr; |
| ''' |
| threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode) |
| threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode) |
| |
| twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True) |
| twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True) |
| twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) |
| twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) |
| twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True) |
| |
| twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) |
| twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) |
| twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) |
| twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) |
| twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True) |
| |
| twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode) |
| twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode) |
| twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) |
| twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) |
| twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode) |
| |
| twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode) |
| twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True) |
| twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True) |
| twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) |
| twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) |
| twoEqualRegInst("vqrdmulh", "VqrdmulhsD", |
| "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) |
| twoEqualRegInst("vqrdmulh", "VqrdmulhsQ", |
| "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) |
| twoEqualRegInst("vqrdmlah", "VqrdmlahsD", |
| "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True, |
| extra=rdm_check) |
| twoEqualRegInst("vqrdmlah", "VqrdmlahsQ", |
| "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True, |
| extra=rdm_check) |
| twoEqualRegInst("vqrdmlsh", "VqrdmlshsD", |
| "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True, |
| extra=rdm_check) |
| twoEqualRegInst("vqrdmlsh", "VqrdmlshsQ", |
| "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True, |
| extra=rdm_check) |
| |
| vshrCode = ''' |
| if (imm >= sizeof(srcElem1) * 8) { |
| if (ltz(srcElem1)) |
| destElem = -1; |
| else |
| destElem = 0; |
| } else { |
| destElem = srcElem1 >> imm; |
| } |
| ''' |
| twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode) |
| twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode) |
| |
| vsraCode = ''' |
| Element mid;; |
| if (imm >= sizeof(srcElem1) * 8) { |
| mid = ltz(srcElem1) ? -1 : 0; |
| } else { |
| mid = srcElem1 >> imm; |
| if (ltz(srcElem1) && !ltz(mid)) { |
| mid |= -(mid & ((Element)1 << |
| (sizeof(Element) * 8 - 1 - imm))); |
| } |
| } |
| destElem += mid; |
| ''' |
| twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True) |
| twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True) |
| |
| vrshrCode = ''' |
| if (imm > sizeof(srcElem1) * 8) { |
| destElem = 0; |
| } else if (imm) { |
| Element rBit = bits(srcElem1, imm - 1); |
| destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; |
| } else { |
| destElem = srcElem1; |
| } |
| ''' |
| twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode) |
| twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode) |
| |
| vrsraCode = ''' |
| if (imm > sizeof(srcElem1) * 8) { |
| destElem += 0; |
| } else if (imm) { |
| Element rBit = bits(srcElem1, imm - 1); |
| destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; |
| } else { |
| destElem += srcElem1; |
| } |
| ''' |
| twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True) |
| twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True) |
| |
| vsriCode = ''' |
| if (imm >= sizeof(Element) * 8) { |
| destElem = destElem; |
| } else { |
| destElem = (srcElem1 >> imm) | |
| (destElem & ~mask(sizeof(Element) * 8 - imm)); |
| } |
| ''' |
| twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True) |
| twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True) |
| |
| vshlCode = ''' |
| if (imm >= sizeof(Element) * 8) { |
| destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; |
| } else { |
| destElem = srcElem1 << imm; |
| } |
| ''' |
| twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode) |
| twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode) |
| |
| vsliCode = ''' |
| if (imm >= sizeof(Element) * 8) { |
| destElem = destElem; |
| } else { |
| destElem = (srcElem1 << imm) | (destElem & mask(imm)); |
| } |
| ''' |
| twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True) |
| twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True) |
| |
| vqshlCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (imm >= sizeof(Element) * 8) { |
| if (srcElem1 != 0) { |
| destElem = std::numeric_limits<Element>::min(); |
| if (srcElem1 > 0) |
| destElem = ~destElem; |
| fpscr.qc = 1; |
| } else { |
| destElem = 0; |
| } |
| } else if (imm) { |
| destElem = (srcElem1 << imm); |
| uint64_t topBits = bits((uint64_t)srcElem1, |
| sizeof(Element) * 8 - 1, |
| sizeof(Element) * 8 - 1 - imm); |
| if (topBits != 0 && topBits != mask(imm + 1)) { |
| destElem = std::numeric_limits<Element>::min(); |
| if (srcElem1 > 0) |
| destElem = ~destElem; |
| fpscr.qc = 1; |
| } |
| } else { |
| destElem = srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode) |
| twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode) |
| |
| vqshluCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (imm >= sizeof(Element) * 8) { |
| if (srcElem1 != 0) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } else { |
| destElem = 0; |
| } |
| } else if (imm) { |
| destElem = (srcElem1 << imm); |
| uint64_t topBits = bits((uint64_t)srcElem1, |
| sizeof(Element) * 8 - 1, |
| sizeof(Element) * 8 - imm); |
| if (topBits != 0) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } |
| } else { |
| destElem = srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode) |
| twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode) |
| |
| vqshlusCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (imm >= sizeof(Element) * 8) { |
| if (srcElem1 < 0) { |
| destElem = 0; |
| fpscr.qc = 1; |
| } else if (srcElem1 > 0) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } else { |
| destElem = 0; |
| } |
| } else if (imm) { |
| destElem = (srcElem1 << imm); |
| uint64_t topBits = bits((uint64_t)srcElem1, |
| sizeof(Element) * 8 - 1, |
| sizeof(Element) * 8 - imm); |
| if (srcElem1 < 0) { |
| destElem = 0; |
| fpscr.qc = 1; |
| } else if (topBits != 0) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } |
| } else { |
| if (srcElem1 < 0) { |
| fpscr.qc = 1; |
| destElem = 0; |
| } else { |
| destElem = srcElem1; |
| } |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode) |
| twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode) |
| |
| vshrnCode = ''' |
| if (imm >= sizeof(srcElem1) * 8) { |
| destElem = 0; |
| } else { |
| destElem = srcElem1 >> imm; |
| } |
| ''' |
| twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode) |
| |
| vrshrnCode = ''' |
| if (imm > sizeof(srcElem1) * 8) { |
| destElem = 0; |
| } else if (imm) { |
| Element rBit = bits(srcElem1, imm - 1); |
| destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; |
| } else { |
| destElem = srcElem1; |
| } |
| ''' |
| twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode) |
| |
| vqshrnCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (imm > sizeof(srcElem1) * 8) { |
| if (srcElem1 != 0 && srcElem1 != -1) |
| fpscr.qc = 1; |
| destElem = 0; |
| } else if (imm) { |
| BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); |
| mid |= -(mid & ((BigElement)1 << |
| (sizeof(BigElement) * 8 - 1 - imm))); |
| if (mid != (Element)mid) { |
| destElem = mask(sizeof(Element) * 8 - 1); |
| if (srcElem1 < 0) |
| destElem = ~destElem; |
| fpscr.qc = 1; |
| } else { |
| destElem = mid; |
| } |
| } else { |
| destElem = srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode) |
| |
| vqshrunCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (imm > sizeof(srcElem1) * 8) { |
| if (srcElem1 != 0) |
| fpscr.qc = 1; |
| destElem = 0; |
| } else if (imm) { |
| BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); |
| if (mid != (Element)mid) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } else { |
| destElem = mid; |
| } |
| } else { |
| destElem = srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegNarrowShiftInst("vqshrun", "NVqshrun", |
| "SimdShiftOp", smallUnsignedTypes, vqshrunCode) |
| |
| vqshrunsCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (imm > sizeof(srcElem1) * 8) { |
| if (srcElem1 != 0) |
| fpscr.qc = 1; |
| destElem = 0; |
| } else if (imm) { |
| BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); |
| if (bits(mid, sizeof(BigElement) * 8 - 1, |
| sizeof(Element) * 8) != 0) { |
| if (srcElem1 < 0) { |
| destElem = 0; |
| } else { |
| destElem = mask(sizeof(Element) * 8); |
| } |
| fpscr.qc = 1; |
| } else { |
| destElem = mid; |
| } |
| } else { |
| destElem = srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegNarrowShiftInst("vqshrun", "NVqshruns", |
| "SimdShiftOp", smallSignedTypes, vqshrunsCode) |
| |
| vqrshrnCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (imm > sizeof(srcElem1) * 8) { |
| if (srcElem1 != 0 && srcElem1 != -1) |
| fpscr.qc = 1; |
| destElem = 0; |
| } else if (imm) { |
| BigElement mid = (srcElem1 >> (imm - 1)); |
| uint64_t rBit = mid & 0x1; |
| mid >>= 1; |
| mid |= -(mid & ((BigElement)1 << |
| (sizeof(BigElement) * 8 - 1 - imm))); |
| mid += rBit; |
| if (mid != (Element)mid) { |
| destElem = mask(sizeof(Element) * 8 - 1); |
| if (srcElem1 < 0) |
| destElem = ~destElem; |
| fpscr.qc = 1; |
| } else { |
| destElem = mid; |
| } |
| } else { |
| if (srcElem1 != (Element)srcElem1) { |
| destElem = mask(sizeof(Element) * 8 - 1); |
| if (srcElem1 < 0) |
| destElem = ~destElem; |
| fpscr.qc = 1; |
| } else { |
| destElem = srcElem1; |
| } |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", |
| "SimdShiftOp", smallSignedTypes, vqrshrnCode) |
| |
| vqrshrunCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (imm > sizeof(srcElem1) * 8) { |
| if (srcElem1 != 0) |
| fpscr.qc = 1; |
| destElem = 0; |
| } else if (imm) { |
| BigElement mid = (srcElem1 >> (imm - 1)); |
| uint64_t rBit = mid & 0x1; |
| mid >>= 1; |
| mid += rBit; |
| if (mid != (Element)mid) { |
| destElem = mask(sizeof(Element) * 8); |
| fpscr.qc = 1; |
| } else { |
| destElem = mid; |
| } |
| } else { |
| if (srcElem1 != (Element)srcElem1) { |
| destElem = mask(sizeof(Element) * 8 - 1); |
| fpscr.qc = 1; |
| } else { |
| destElem = srcElem1; |
| } |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", |
| "SimdShiftOp", smallUnsignedTypes, vqrshrunCode) |
| |
| vqrshrunsCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (imm > sizeof(srcElem1) * 8) { |
| if (srcElem1 != 0) |
| fpscr.qc = 1; |
| destElem = 0; |
| } else if (imm) { |
| BigElement mid = (srcElem1 >> (imm - 1)); |
| uint64_t rBit = mid & 0x1; |
| mid >>= 1; |
| mid |= -(mid & ((BigElement)1 << |
| (sizeof(BigElement) * 8 - 1 - imm))); |
| mid += rBit; |
| if (bits(mid, sizeof(BigElement) * 8 - 1, |
| sizeof(Element) * 8) != 0) { |
| if (srcElem1 < 0) { |
| destElem = 0; |
| } else { |
| destElem = mask(sizeof(Element) * 8); |
| } |
| fpscr.qc = 1; |
| } else { |
| destElem = mid; |
| } |
| } else { |
| if (srcElem1 < 0) { |
| fpscr.qc = 1; |
| destElem = 0; |
| } else { |
| destElem = srcElem1; |
| } |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", |
| "SimdShiftOp", smallSignedTypes, vqrshrunsCode) |
| |
| vshllCode = ''' |
| if (imm >= sizeof(destElem) * 8) { |
| destElem = 0; |
| } else { |
| destElem = (BigElement)srcElem1 << imm; |
| } |
| ''' |
| twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode) |
| |
| vmovlCode = ''' |
| destElem = srcElem1; |
| ''' |
| twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode) |
| |
| vcvt2ufxCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| if (flushToZero(srcElem1)) |
| fpscr.idc = 1; |
| VfpSavedState state = prepFpState(VfpRoundNearest); |
| __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); |
| destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm); |
| __asm__ __volatile__("" :: "m" (destReg)); |
| finishVfp(fpscr, state, true); |
| FpscrExc = fpscr; |
| ''' |
| twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",), |
| 2, vcvt2ufxCode, toInt = True) |
| twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",), |
| 4, vcvt2ufxCode, toInt = True) |
| |
| vcvt2sfxCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| if (flushToZero(srcElem1)) |
| fpscr.idc = 1; |
| VfpSavedState state = prepFpState(VfpRoundNearest); |
| __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); |
| destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm); |
| __asm__ __volatile__("" :: "m" (destReg)); |
| finishVfp(fpscr, state, true); |
| FpscrExc = fpscr; |
| ''' |
| twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",), |
| 2, vcvt2sfxCode, toInt = True) |
| twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",), |
| 4, vcvt2sfxCode, toInt = True) |
| |
| vcvtu2fpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| VfpSavedState state = prepFpState(VfpRoundNearest); |
| __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); |
| destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm); |
| __asm__ __volatile__("" :: "m" (destElem)); |
| finishVfp(fpscr, state, true); |
| FpscrExc = fpscr; |
| ''' |
| twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",), |
| 2, vcvtu2fpCode, fromInt = True) |
| twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",), |
| 4, vcvtu2fpCode, fromInt = True) |
| |
| vcvts2fpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| VfpSavedState state = prepFpState(VfpRoundNearest); |
| __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); |
| destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm); |
| __asm__ __volatile__("" :: "m" (destElem)); |
| finishVfp(fpscr, state, true); |
| FpscrExc = fpscr; |
| ''' |
| twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",), |
| 2, vcvts2fpCode, fromInt = True) |
| twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",), |
| 4, vcvts2fpCode, fromInt = True) |
| |
| vcvts2hCode = ''' |
| destElem = 0; |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float srcFp1 = bitsToFp(srcElem1, (float)0.0); |
| if (flushToZero(srcFp1)) |
| fpscr.idc = 1; |
| VfpSavedState state = prepFpState(VfpRoundNearest); |
| __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem) |
| : "m" (srcFp1), "m" (destElem)); |
| destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest, |
| fpscr.ahp, srcFp1); |
| __asm__ __volatile__("" :: "m" (destElem)); |
| finishVfp(fpscr, state, true); |
| FpscrExc = fpscr; |
| ''' |
| twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode) |
| |
| vcvth2sCode = ''' |
| destElem = 0; |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| VfpSavedState state = prepFpState(VfpRoundNearest); |
| __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) |
| : "m" (srcElem1), "m" (destElem)); |
| destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); |
| __asm__ __volatile__("" :: "m" (destElem)); |
| finishVfp(fpscr, state, true); |
| FpscrExc = fpscr; |
| ''' |
| twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode) |
| |
| vrsqrteCode = ''' |
| destElem = unsignedRSqrtEstimate(srcElem1); |
| ''' |
| twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode) |
| twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode) |
| |
| vrsqrtefpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| if (flushToZero(srcReg1)) |
| fpscr.idc = 1; |
| destReg = fprSqrtEstimate(fpscr, srcReg1); |
| FpscrExc = fpscr; |
| ''' |
| twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode) |
| twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode) |
| |
| vrecpeCode = ''' |
| destElem = unsignedRecipEstimate(srcElem1); |
| ''' |
| twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode) |
| twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode) |
| |
| vrecpefpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| if (flushToZero(srcReg1)) |
| fpscr.idc = 1; |
| destReg = fpRecipEstimate(fpscr, srcReg1); |
| FpscrExc = fpscr; |
| ''' |
| twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode) |
| twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode) |
| |
| vrev16Code = ''' |
| destElem = srcElem1; |
| unsigned groupSize = ((1 << 1) / sizeof(Element)); |
| unsigned reverseMask = (groupSize - 1); |
| j = i ^ reverseMask; |
| ''' |
| twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code) |
| twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code) |
| vrev32Code = ''' |
| destElem = srcElem1; |
| unsigned groupSize = ((1 << 2) / sizeof(Element)); |
| unsigned reverseMask = (groupSize - 1); |
| j = i ^ reverseMask; |
| ''' |
| twoRegMiscInst("vrev32", "NVrev32D", |
| "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code) |
| twoRegMiscInst("vrev32", "NVrev32Q", |
| "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code) |
| vrev64Code = ''' |
| destElem = srcElem1; |
| unsigned groupSize = ((1 << 3) / sizeof(Element)); |
| unsigned reverseMask = (groupSize - 1); |
| j = i ^ reverseMask; |
| ''' |
| twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code) |
| twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code) |
| |
| split('exec') |
| exec_output += vcompares + vcomparesL |
| |
| vpaddlCode = ''' |
| destElem = (BigElement)srcElem1 + (BigElement)srcElem2; |
| ''' |
| twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode) |
| twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode) |
| |
| vpadalCode = ''' |
| destElem += (BigElement)srcElem1 + (BigElement)srcElem2; |
| ''' |
| twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True) |
| twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True) |
| |
| vclsCode = ''' |
| unsigned count = 0; |
| if (srcElem1 < 0) { |
| srcElem1 <<= 1; |
| while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { |
| count++; |
| srcElem1 <<= 1; |
| } |
| } else { |
| srcElem1 <<= 1; |
| while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { |
| count++; |
| srcElem1 <<= 1; |
| } |
| } |
| destElem = count; |
| ''' |
| twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode) |
| twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode) |
| |
| vclzCode = ''' |
| unsigned count = 0; |
| while (srcElem1 >= 0 && count < sizeof(Element) * 8) { |
| count++; |
| srcElem1 <<= 1; |
| } |
| destElem = count; |
| ''' |
| twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode) |
| twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode) |
| |
| vcntCode = ''' |
| unsigned count = 0; |
| while (srcElem1 && count < sizeof(Element) * 8) { |
| count += srcElem1 & 0x1; |
| srcElem1 >>= 1; |
| } |
| destElem = count; |
| ''' |
| |
| twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode) |
| twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode) |
| |
| vmvnCode = ''' |
| destElem = ~srcElem1; |
| ''' |
| twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) |
| twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) |
| |
| vqabsCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { |
| fpscr.qc = 1; |
| destElem = ~srcElem1; |
| } else if (srcElem1 < 0) { |
| destElem = -srcElem1; |
| } else { |
| destElem = srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode) |
| twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode) |
| |
| vqnegCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { |
| fpscr.qc = 1; |
| destElem = ~srcElem1; |
| } else { |
| destElem = -srcElem1; |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode) |
| twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode) |
| |
| vabsCode = ''' |
| if (srcElem1 < 0) { |
| destElem = -srcElem1; |
| } else { |
| destElem = srcElem1; |
| } |
| ''' |
| |
| twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode) |
| twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode) |
| vabsfpCode = ''' |
| union |
| { |
| uint32_t i; |
| float f; |
| } cStruct; |
| cStruct.f = srcReg1; |
| cStruct.i &= mask(sizeof(Element) * 8 - 1); |
| destReg = cStruct.f; |
| ''' |
| twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode) |
| twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode) |
| |
| vnegCode = ''' |
| destElem = -srcElem1; |
| ''' |
| twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode) |
| twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode) |
| vnegfpCode = ''' |
| destReg = -srcReg1; |
| ''' |
| twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode) |
| twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode) |
| |
| vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;' |
| twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode) |
| twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode) |
| vcgtfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgtFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",), |
| 2, vcgtfpCode, toInt = True) |
| twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",), |
| 4, vcgtfpCode, toInt = True) |
| |
| vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;' |
| twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode) |
| twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode) |
| vcgefpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgeFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",), |
| 2, vcgefpCode, toInt = True) |
| twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",), |
| 4, vcgefpCode, toInt = True) |
| |
| vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;' |
| twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode) |
| twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode) |
| vceqfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, (float)0.0, vceqFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",), |
| 2, vceqfpCode, toInt = True) |
| twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",), |
| 4, vceqfpCode, toInt = True) |
| |
| vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;' |
| twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode) |
| twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode) |
| vclefpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, (float)0.0, vcleFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",), |
| 2, vclefpCode, toInt = True) |
| twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",), |
| 4, vclefpCode, toInt = True) |
| |
| vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;' |
| twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode) |
| twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode) |
| vcltfpCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrExc; |
| float res = binaryOp(fpscr, srcReg1, (float)0.0, vcltFunc, |
| true, true, VfpRoundNearest); |
| destReg = (res == 0) ? -1 : 0; |
| if (res == 2.0) |
| fpscr.ioc = 1; |
| FpscrExc = fpscr; |
| ''' |
| twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",), |
| 2, vcltfpCode, toInt = True) |
| twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",), |
| 4, vcltfpCode, toInt = True) |
| |
| vswpCode = ''' |
| uint32_t mid; |
| for (unsigned r = 0; r < rCount; r++) { |
| mid = srcReg1.regs[r]; |
| srcReg1.regs[r] = destReg.regs[r]; |
| destReg.regs[r] = mid; |
| } |
| ''' |
| twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode) |
| twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode) |
| |
| vtrnCode = ''' |
| Element mid; |
| for (unsigned i = 0; i < eCount; i += 2) { |
| mid = srcReg1.elements[i]; |
| srcReg1.elements[i] = destReg.elements[i + 1]; |
| destReg.elements[i + 1] = mid; |
| } |
| ''' |
| twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", |
| smallUnsignedTypes, 2, vtrnCode) |
| twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", |
| smallUnsignedTypes, 4, vtrnCode) |
| |
| vuzpCode = ''' |
| Element mid[eCount]; |
| memcpy(&mid, &srcReg1, sizeof(srcReg1)); |
| for (unsigned i = 0; i < eCount / 2; i++) { |
| srcReg1.elements[i] = destReg.elements[2 * i + 1]; |
| srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; |
| destReg.elements[i] = destReg.elements[2 * i]; |
| } |
| for (unsigned i = 0; i < eCount / 2; i++) { |
| destReg.elements[eCount / 2 + i] = mid[2 * i]; |
| } |
| ''' |
| twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode) |
| twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode) |
| |
| vzipCode = ''' |
| Element mid[eCount]; |
| memcpy(&mid, &destReg, sizeof(destReg)); |
| for (unsigned i = 0; i < eCount / 2; i++) { |
| destReg.elements[2 * i] = mid[i]; |
| destReg.elements[2 * i + 1] = srcReg1.elements[i]; |
| } |
| for (int i = 0; i < eCount / 2; i++) { |
| srcReg1.elements[2 * i] = mid[eCount / 2 + i]; |
| srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i]; |
| } |
| ''' |
| twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode) |
| twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode) |
| |
| vmovnCode = 'destElem = srcElem1;' |
| twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode) |
| |
| vdupCode = 'destElem = srcElem1;' |
| twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode) |
| twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode) |
| |
| def vdupGprInst(name, Name, opClass, types, rCount): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect destReg; |
| for (unsigned i = 0; i < eCount; i++) { |
| destReg.elements[i] = htole((Element)Op1); |
| } |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2) |
| vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4) |
| |
| vmovCode = 'destElem = imm;' |
| oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode) |
| oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode) |
| |
| vorrCode = 'destElem |= imm;' |
| oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True) |
| oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True) |
| |
| vmvnCode = 'destElem = ~imm;' |
| oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) |
| oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) |
| |
| vbicCode = 'destElem &= ~imm;' |
| oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True) |
| oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True) |
| |
| vqmovnCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| destElem = srcElem1; |
| if ((BigElement)destElem != srcElem1) { |
| fpscr.qc = 1; |
| destElem = mask(sizeof(Element) * 8 - 1); |
| if (srcElem1 < 0) |
| destElem = ~destElem; |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode) |
| |
| vqmovunCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| destElem = srcElem1; |
| if ((BigElement)destElem != srcElem1) { |
| fpscr.qc = 1; |
| destElem = mask(sizeof(Element) * 8); |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegNarrowMiscInst("vqmovun", "NVqmovun", |
| "SimdMiscOp", smallUnsignedTypes, vqmovunCode) |
| |
| vqmovunsCode = ''' |
| FPSCR fpscr = (FPSCR) FpscrQc; |
| destElem = srcElem1; |
| if (srcElem1 < 0 || |
| ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { |
| fpscr.qc = 1; |
| destElem = mask(sizeof(Element) * 8); |
| if (srcElem1 < 0) |
| destElem = ~destElem; |
| } |
| FpscrQc = fpscr; |
| ''' |
| twoRegNarrowMiscInst("vqmovun", "NVqmovuns", |
| "SimdMiscOp", smallSignedTypes, vqmovunsCode) |
| |
| def buildVext(name, Name, opClass, types, rCount, op): |
| global header_output, exec_output |
| eWalkCode = simdEnabledCheckCode + ''' |
| RegVect srcReg1, srcReg2, destReg; |
| ''' |
| for reg in range(rCount): |
| eWalkCode += ''' |
| srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw); |
| srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw); |
| ''' % { "reg" : reg } |
| eWalkCode += op |
| for reg in range(rCount): |
| eWalkCode += ''' |
| FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]); |
| ''' % { "reg" : reg } |
| iop = ArmInstObjParams(name, Name, |
| "RegRegRegImmOp", |
| { "code": eWalkCode, |
| "r_count": rCount, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += NeonRegRegRegImmOpDeclare.subst(iop) |
| exec_output += NeonEqualRegExecute.subst(iop) |
| for type in types: |
| substDict = { "targs" : type, |
| "class_name" : Name } |
| exec_output += NeonExecDeclare.subst(substDict) |
| |
| vextCode = ''' |
| for (unsigned i = 0; i < eCount; i++) { |
| unsigned index = i + imm; |
| if (index < eCount) { |
| destReg.elements[i] = srcReg1.elements[index]; |
| } else { |
| index -= eCount; |
| if (index >= eCount) { |
| fault = std::make_shared<UndefinedInstruction>(machInst, |
| false, |
| mnemonic); |
| } else { |
| destReg.elements[i] = srcReg2.elements[index]; |
| } |
| } |
| } |
| ''' |
| buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode) |
| buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode) |
| |
| def buildVtbxl(name, Name, opClass, length, isVtbl): |
| global header_output, decoder_output, exec_output |
| code = simdEnabledCheckCode + ''' |
| union |
| { |
| uint8_t bytes[32]; |
| uint32_t regs[8]; |
| } table; |
| |
| union |
| { |
| uint8_t bytes[8]; |
| uint32_t regs[2]; |
| } destReg, srcReg2; |
| |
| const unsigned length = %(length)d; |
| const bool isVtbl = %(isVtbl)s; |
| |
| srcReg2.regs[0] = htole(FpOp2P0_uw); |
| srcReg2.regs[1] = htole(FpOp2P1_uw); |
| |
| destReg.regs[0] = htole(FpDestP0_uw); |
| destReg.regs[1] = htole(FpDestP1_uw); |
| ''' % { "length" : length, "isVtbl" : isVtbl } |
| for reg in range(8): |
| if reg < length * 2: |
| code += 'table.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);\n' % \ |
| { "reg" : reg } |
| else: |
| code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg } |
| code += ''' |
| for (unsigned i = 0; i < sizeof(destReg); i++) { |
| uint8_t index = srcReg2.bytes[i]; |
| if (index < 8 * length) { |
| destReg.bytes[i] = table.bytes[index]; |
| } else { |
| if (isVtbl) |
| destReg.bytes[i] = 0; |
| // else destReg.bytes[i] unchanged |
| } |
| } |
| |
| FpDestP0_uw = letoh(destReg.regs[0]); |
| FpDestP1_uw = letoh(destReg.regs[1]); |
| ''' |
| iop = ArmInstObjParams(name, Name, |
| "RegRegRegOp", |
| { "code": code, |
| "predicate_test": predicateTest, |
| "op_class": opClass }, []) |
| header_output += RegRegRegOpDeclare.subst(iop) |
| decoder_output += RegRegRegOpConstructor.subst(iop) |
| exec_output += PredOpExecute.subst(iop) |
| |
| buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true") |
| buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true") |
| buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true") |
| buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true") |
| |
| buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false") |
| buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false") |
| buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false") |
| buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false") |
| }}; |