// -*- mode:c++ -*-

// Copyright (c) 2010-2011, 2015, 2019 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
// not be construed as granting a license to any other intellectual
// property including but not limited to intellectual property relating
// to a hardware implementation of the functionality of the software
// licensed hereunder.  You may use the software subject to the license
// terms below provided that you ensure that this notice is replicated
// unmodified and in its entirety in all distributions of the software,
// modified or unmodified, in source code or in binary form.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

output header {{
    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUThreeUReg(unsigned size,
                         ExtMachInst machInst, IntRegIndex dest,
                         IntRegIndex op1, IntRegIndex op2)
    {
        switch (size) {
          case 0:
            return new Base<uint8_t>(machInst, dest, op1, op2);
          case 1:
            return new Base<uint16_t>(machInst, dest, op1, op2);
          case 2:
            return new Base<uint32_t>(machInst, dest, op1, op2);
          case 3:
            return new Base<uint64_t>(machInst, dest, op1, op2);
          default:
            return new Unknown(machInst);
        }
    }

    template <class BaseS, class BaseD>
    StaticInstPtr
    decodeNeonSizeSingleDouble(unsigned size,
                         ExtMachInst machInst, IntRegIndex dest,
                         IntRegIndex op1, IntRegIndex op2)
    {
        switch (size) {
          case 2:
            return new BaseS(machInst, dest, op1, op2);
          case 3:
            return new BaseD(machInst, dest, op1, op2);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonSThreeUReg(unsigned size,
                         ExtMachInst machInst, IntRegIndex dest,
                         IntRegIndex op1, IntRegIndex op2)
    {
        switch (size) {
          case 0:
            return new Base<int8_t>(machInst, dest, op1, op2);
          case 1:
            return new Base<int16_t>(machInst, dest, op1, op2);
          case 2:
            return new Base<int32_t>(machInst, dest, op1, op2);
          case 3:
            return new Base<int64_t>(machInst, dest, op1, op2);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
                          ExtMachInst machInst, IntRegIndex dest,
                          IntRegIndex op1, IntRegIndex op2)
    {
        if (notSigned) {
            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
        } else {
            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUThreeUSReg(unsigned size,
                          ExtMachInst machInst, IntRegIndex dest,
                          IntRegIndex op1, IntRegIndex op2)
    {
        switch (size) {
          case 0:
            return new Base<uint8_t>(machInst, dest, op1, op2);
          case 1:
            return new Base<uint16_t>(machInst, dest, op1, op2);
          case 2:
            return new Base<uint32_t>(machInst, dest, op1, op2);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonSThreeUSReg(unsigned size,
                          ExtMachInst machInst, IntRegIndex dest,
                          IntRegIndex op1, IntRegIndex op2)
    {
        switch (size) {
          case 0:
            return new Base<int8_t>(machInst, dest, op1, op2);
          case 1:
            return new Base<int16_t>(machInst, dest, op1, op2);
          case 2:
            return new Base<int32_t>(machInst, dest, op1, op2);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
                             IntRegIndex dest, IntRegIndex op1,
                             IntRegIndex op2)
    {
        switch (size) {
          case 1:
            return new Base<int16_t>(machInst, dest, op1, op2);
          case 2:
            return new Base<int32_t>(machInst, dest, op1, op2);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
                                IntRegIndex dest, IntRegIndex op1,
                                IntRegIndex op2, uint64_t imm)
    {
        switch (size) {
          case 1:
            return new Base<int16_t>(machInst, dest, op1, op2, imm);
          case 2:
            return new Base<int32_t>(machInst, dest, op1, op2, imm);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
                           ExtMachInst machInst, IntRegIndex dest,
                           IntRegIndex op1, IntRegIndex op2)
    {
        if (notSigned) {
            return decodeNeonUThreeUSReg<Base>(
                    size, machInst, dest, op1, op2);
        } else {
            return decodeNeonSThreeUSReg<Base>(
                    size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUThreeSReg(bool q, unsigned size,
                         ExtMachInst machInst, IntRegIndex dest,
                         IntRegIndex op1, IntRegIndex op2)
    {
        if (q) {
            return decodeNeonUThreeUSReg<BaseQ>(
                    size, machInst, dest, op1, op2);
        } else {
            return decodeNeonUThreeUSReg<BaseD>(
                    size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSThreeSReg(bool q, unsigned size,
                         ExtMachInst machInst, IntRegIndex dest,
                         IntRegIndex op1, IntRegIndex op2)
    {
        if (q) {
            return decodeNeonSThreeUSReg<BaseQ>(
                    size, machInst, dest, op1, op2);
        } else {
            return decodeNeonSThreeUSReg<BaseD>(
                    size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSThreeXReg(bool q, unsigned size,
                         ExtMachInst machInst, IntRegIndex dest,
                         IntRegIndex op1, IntRegIndex op2)
    {
        if (q) {
            return decodeNeonSThreeUReg<BaseQ>(
                    size, machInst, dest, op1, op2);
        } else {
            return decodeNeonSThreeUSReg<BaseD>(
                    size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUThreeXReg(bool q, unsigned size,
                         ExtMachInst machInst, IntRegIndex dest,
                         IntRegIndex op1, IntRegIndex op2)
    {
        if (q) {
            return decodeNeonUThreeUReg<BaseQ>(
                    size, machInst, dest, op1, op2);
        } else {
            return decodeNeonUThreeUSReg<BaseD>(
                    size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
                          ExtMachInst machInst, IntRegIndex dest,
                          IntRegIndex op1, IntRegIndex op2)
    {
        if (notSigned) {
            return decodeNeonUThreeSReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1, op2);
        } else {
            return decodeNeonSThreeSReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUThreeReg(bool q, unsigned size,
                        ExtMachInst machInst, IntRegIndex dest,
                        IntRegIndex op1, IntRegIndex op2)
    {
        if (q) {
            return decodeNeonUThreeUReg<BaseQ>(
                    size, machInst, dest, op1, op2);
        } else {
            return decodeNeonUThreeUReg<BaseD>(
                    size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSThreeReg(bool q, unsigned size,
                        ExtMachInst machInst, IntRegIndex dest,
                        IntRegIndex op1, IntRegIndex op2)
    {
        if (q) {
            return decodeNeonSThreeUReg<BaseQ>(
                    size, machInst, dest, op1, op2);
        } else {
            return decodeNeonSThreeUReg<BaseD>(
                    size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
                         ExtMachInst machInst, IntRegIndex dest,
                         IntRegIndex op1, IntRegIndex op2)
    {
        if (notSigned) {
            return decodeNeonUThreeReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1, op2);
        } else {
            return decodeNeonSThreeReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
                          IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
    {
        if (q) {
            if (size)
                return new BaseQ<uint64_t>(machInst, dest, op1, op2);
            else
                return new BaseQ<uint32_t>(machInst, dest, op1, op2);
        } else {
            if (size)
                return new Unknown(machInst);
            else
                return new BaseD<uint32_t>(machInst, dest, op1, op2);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
                            IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
    {
        if (size)
            return new Base<uint64_t>(machInst, dest, op1, op2);
        else
            return new Base<uint32_t>(machInst, dest, op1, op2);
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
                               IntRegIndex dest, IntRegIndex op1,
                               IntRegIndex op2, uint64_t imm)
    {
        if (size)
            return new Base<uint64_t>(machInst, dest, op1, op2, imm);
        else
            return new Base<uint32_t>(machInst, dest, op1, op2, imm);
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
                                IntRegIndex dest, IntRegIndex op1,
                                IntRegIndex op2, uint64_t imm)
    {
        if (q) {
            switch (size) {
              case 1:
                return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
              case 2:
                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
              default:
                return new Unknown(machInst);
            }
        } else {
            switch (size) {
              case 1:
                return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
              case 2:
                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
              default:
                return new Unknown(machInst);
            }
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
                                IntRegIndex dest, IntRegIndex op1,
                                IntRegIndex op2, uint64_t imm)
    {
        if (q) {
            switch (size) {
              case 1:
                return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
              case 2:
                return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
              default:
                return new Unknown(machInst);
            }
        } else {
            switch (size) {
              case 1:
                return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
              case 2:
                return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
              default:
                return new Unknown(machInst);
            }
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
                             IntRegIndex dest, IntRegIndex op1,
                             IntRegIndex op2, uint64_t imm)
    {
        if (q) {
            if (size)
                return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
            else
                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
        } else {
            if (size)
                return new Unknown(machInst);
            else
                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUTwoShiftReg(bool q, unsigned size,
                           ExtMachInst machInst, IntRegIndex dest,
                           IntRegIndex op1, uint64_t imm)
    {
        if (q) {
            switch (size) {
              case 0:
                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
              case 1:
                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
              case 2:
                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
              case 3:
                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
              default:
                return new Unknown(machInst);
            }
        } else {
            switch (size) {
              case 0:
                return new BaseD<uint8_t>(machInst, dest, op1, imm);
              case 1:
                return new BaseD<uint16_t>(machInst, dest, op1, imm);
              case 2:
                return new BaseD<uint32_t>(machInst, dest, op1, imm);
              case 3:
                return new BaseD<uint64_t>(machInst, dest, op1, imm);
              default:
                return new Unknown(machInst);
            }
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSTwoShiftReg(bool q, unsigned size,
                           ExtMachInst machInst, IntRegIndex dest,
                           IntRegIndex op1, uint64_t imm)
    {
        if (q) {
            switch (size) {
              case 0:
                return new BaseQ<int8_t>(machInst, dest, op1, imm);
              case 1:
                return new BaseQ<int16_t>(machInst, dest, op1, imm);
              case 2:
                return new BaseQ<int32_t>(machInst, dest, op1, imm);
              case 3:
                return new BaseQ<int64_t>(machInst, dest, op1, imm);
              default:
                return new Unknown(machInst);
            }
        } else {
            switch (size) {
              case 0:
                return new BaseD<int8_t>(machInst, dest, op1, imm);
              case 1:
                return new BaseD<int16_t>(machInst, dest, op1, imm);
              case 2:
                return new BaseD<int32_t>(machInst, dest, op1, imm);
              case 3:
                return new BaseD<int64_t>(machInst, dest, op1, imm);
              default:
                return new Unknown(machInst);
            }
        }
    }


    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
                            ExtMachInst machInst, IntRegIndex dest,
                            IntRegIndex op1, uint64_t imm)
    {
        if (notSigned) {
            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1, imm);
        } else {
            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1, imm);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUTwoShiftUSReg(unsigned size,
                             ExtMachInst machInst, IntRegIndex dest,
                             IntRegIndex op1, uint64_t imm)
    {
        switch (size) {
          case 0:
            return new Base<uint8_t>(machInst, dest, op1, imm);
          case 1:
            return new Base<uint16_t>(machInst, dest, op1, imm);
          case 2:
            return new Base<uint32_t>(machInst, dest, op1, imm);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUTwoShiftUReg(unsigned size,
                            ExtMachInst machInst, IntRegIndex dest,
                            IntRegIndex op1, uint64_t imm)
    {
        switch (size) {
          case 0:
            return new Base<uint8_t>(machInst, dest, op1, imm);
          case 1:
            return new Base<uint16_t>(machInst, dest, op1, imm);
          case 2:
            return new Base<uint32_t>(machInst, dest, op1, imm);
          case 3:
            return new Base<uint64_t>(machInst, dest, op1, imm);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonSTwoShiftUReg(unsigned size,
                            ExtMachInst machInst, IntRegIndex dest,
                            IntRegIndex op1, uint64_t imm)
    {
        switch (size) {
          case 0:
            return new Base<int8_t>(machInst, dest, op1, imm);
          case 1:
            return new Base<int16_t>(machInst, dest, op1, imm);
          case 2:
            return new Base<int32_t>(machInst, dest, op1, imm);
          case 3:
            return new Base<int64_t>(machInst, dest, op1, imm);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUTwoShiftSReg(bool q, unsigned size,
                            ExtMachInst machInst, IntRegIndex dest,
                            IntRegIndex op1, uint64_t imm)
    {
        if (q) {
            return decodeNeonUTwoShiftUSReg<BaseQ>(
                    size, machInst, dest, op1, imm);
        } else {
            return decodeNeonUTwoShiftUSReg<BaseD>(
                    size, machInst, dest, op1, imm);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonSTwoShiftUSReg(unsigned size,
                             ExtMachInst machInst, IntRegIndex dest,
                             IntRegIndex op1, uint64_t imm)
    {
        switch (size) {
          case 0:
            return new Base<int8_t>(machInst, dest, op1, imm);
          case 1:
            return new Base<int16_t>(machInst, dest, op1, imm);
          case 2:
            return new Base<int32_t>(machInst, dest, op1, imm);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSTwoShiftSReg(bool q, unsigned size,
                            ExtMachInst machInst, IntRegIndex dest,
                            IntRegIndex op1, uint64_t imm)
    {
        if (q) {
            return decodeNeonSTwoShiftUSReg<BaseQ>(
                    size, machInst, dest, op1, imm);
        } else {
            return decodeNeonSTwoShiftUSReg<BaseD>(
                    size, machInst, dest, op1, imm);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
                             ExtMachInst machInst, IntRegIndex dest,
                             IntRegIndex op1, uint64_t imm)
    {
        if (notSigned) {
            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1, imm);
        } else {
            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1, imm);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
    {
        if (q) {
            return decodeNeonUTwoShiftUReg<BaseQ>(
                size, machInst, dest, op1, imm);
        } else {
            return decodeNeonUTwoShiftUSReg<BaseD>(
                size, machInst, dest, op1, imm);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
    {
        if (q) {
            return decodeNeonSTwoShiftUReg<BaseQ>(
                size, machInst, dest, op1, imm);
        } else {
            return decodeNeonSTwoShiftUSReg<BaseD>(
                size, machInst, dest, op1, imm);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
                              IntRegIndex dest, IntRegIndex op1, uint64_t imm)
    {
        if (size)
            return new Base<uint64_t>(machInst, dest, op1, imm);
        else
            return new Base<uint32_t>(machInst, dest, op1, imm);
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
                             IntRegIndex dest, IntRegIndex op1, uint64_t imm)
    {
        if (q) {
            if (size)
                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
            else
                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
        } else {
            if (size)
                return new Unknown(machInst);
            else
                return new BaseD<uint32_t>(machInst, dest, op1, imm);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUTwoMiscUSReg(unsigned size,
                            ExtMachInst machInst, IntRegIndex dest,
                            IntRegIndex op1)
    {
        switch (size) {
          case 0:
            return new Base<uint8_t>(machInst, dest, op1);
          case 1:
            return new Base<uint16_t>(machInst, dest, op1);
          case 2:
            return new Base<uint32_t>(machInst, dest, op1);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonSTwoMiscUSReg(unsigned size,
                            ExtMachInst machInst, IntRegIndex dest,
                            IntRegIndex op1)
    {
        switch (size) {
          case 0:
            return new Base<int8_t>(machInst, dest, op1);
          case 1:
            return new Base<int16_t>(machInst, dest, op1);
          case 2:
            return new Base<int32_t>(machInst, dest, op1);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUTwoMiscSReg(bool q, unsigned size,
                           ExtMachInst machInst, IntRegIndex dest,
                           IntRegIndex op1)
    {
        if (q) {
            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
        } else {
            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSTwoMiscSReg(bool q, unsigned size,
                           ExtMachInst machInst, IntRegIndex dest,
                           IntRegIndex op1)
    {
        if (q) {
            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
        } else {
            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUTwoMiscUReg(unsigned size,
                           ExtMachInst machInst, IntRegIndex dest,
                           IntRegIndex op1)
    {
        switch (size) {
          case 0:
            return new Base<uint8_t>(machInst, dest, op1);
          case 1:
            return new Base<uint16_t>(machInst, dest, op1);
          case 2:
            return new Base<uint32_t>(machInst, dest, op1);
          case 3:
            return new Base<uint64_t>(machInst, dest, op1);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonSTwoMiscUReg(unsigned size,
                           ExtMachInst machInst, IntRegIndex dest,
                           IntRegIndex op1)
    {
        switch (size) {
          case 0:
            return new Base<int8_t>(machInst, dest, op1);
          case 1:
            return new Base<int16_t>(machInst, dest, op1);
          case 2:
            return new Base<int32_t>(machInst, dest, op1);
          case 3:
            return new Base<int64_t>(machInst, dest, op1);
          default:
            return new Unknown(machInst);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSTwoMiscReg(bool q, unsigned size,
                          ExtMachInst machInst, IntRegIndex dest,
                          IntRegIndex op1)
    {
        if (q) {
            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
        } else {
            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUTwoMiscReg(bool q, unsigned size,
                          ExtMachInst machInst, IntRegIndex dest,
                          IntRegIndex op1)
    {
        if (q) {
            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
        } else {
            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
                            ExtMachInst machInst, IntRegIndex dest,
                            IntRegIndex op1)
    {
        if (notSigned) {
            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1);
        } else {
            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
                    q, size, machInst, dest, op1);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
                           IntRegIndex dest, IntRegIndex op1)
    {
        if (q) {
            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
        } else {
            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
                           IntRegIndex dest, IntRegIndex op1)
    {
        if (q) {
            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
        } else {
            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
                            IntRegIndex dest, IntRegIndex op1)
    {
        if (q) {
            if (size)
                return new BaseQ<uint64_t>(machInst, dest, op1);
            else
                return new BaseQ<uint32_t>(machInst, dest, op1);
        } else {
            if (size)
                return new Unknown(machInst);
            else
                return new BaseD<uint32_t>(machInst, dest, op1);
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
                                   IntRegIndex dest, IntRegIndex op1)
    {
        if (size)
            return new BaseQ<uint64_t>(machInst, dest, op1);
        else
            return new BaseD<uint32_t>(machInst, dest, op1);
    }

    template <template <typename T> class Base>
    StaticInstPtr
    decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
                              IntRegIndex dest, IntRegIndex op1)
    {
        if (size)
            return new Base<uint64_t>(machInst, dest, op1);
        else
            return new Base<uint32_t>(machInst, dest, op1);
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
                              IntRegIndex dest, IntRegIndex op1)
    {
        if (q) {
            switch (size) {
              case 0x0:
                return new BaseQ<uint8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseQ<uint16_t>(machInst, dest, op1);
              case 0x2:
                return new BaseQ<uint32_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        } else {
            switch (size) {
              case 0x0:
                return new BaseD<uint8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseD<uint16_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ,
              template <typename T> class BaseBQ>
    StaticInstPtr
    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
                              IntRegIndex dest, IntRegIndex op1)
    {
        if (q) {
            switch (size) {
              case 0x0:
                return new BaseQ<uint8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseQ<uint16_t>(machInst, dest, op1);
              case 0x2:
                return new BaseBQ<uint32_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        } else {
            switch (size) {
              case 0x0:
                return new BaseD<uint8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseD<uint16_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ>
    StaticInstPtr
    decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
                              IntRegIndex dest, IntRegIndex op1)
    {
        if (q) {
            switch (size) {
              case 0x0:
                return new BaseQ<int8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseQ<int16_t>(machInst, dest, op1);
              case 0x2:
                return new BaseQ<int32_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        } else {
            switch (size) {
              case 0x0:
                return new BaseD<int8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseD<int16_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ,
              template <typename T> class BaseBQ>
    StaticInstPtr
    decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
                                  IntRegIndex dest, IntRegIndex op1)
    {
        if (q) {
            switch (size) {
              case 0x0:
                return new BaseQ<uint8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseQ<uint16_t>(machInst, dest, op1);
              case 0x2:
                return new BaseBQ<uint32_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        } else {
            switch (size) {
              case 0x0:
                return new BaseD<uint8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseD<uint16_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        }
    }

    template <template <typename T> class BaseD,
              template <typename T> class BaseQ,
              template <typename T> class BaseBQ>
    StaticInstPtr
    decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
                                  IntRegIndex dest, IntRegIndex op1)
    {
        if (q) {
            switch (size) {
              case 0x0:
                return new BaseQ<int8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseQ<int16_t>(machInst, dest, op1);
              case 0x2:
                return new BaseBQ<int32_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        } else {
            switch (size) {
              case 0x0:
                return new BaseD<int8_t>(machInst, dest, op1);
              case 0x1:
                return new BaseD<int16_t>(machInst, dest, op1);
              default:
                return new Unknown(machInst);
            }
        }
    }
}};

let {{
    header_output = ""
    exec_output = ""

    vcompares = '''
    static float
    vcgtFunc(float op1, float op2)
    {
        if (std::isnan(op1) || std::isnan(op2))
            return 2.0;
        return (op1 > op2) ? 0.0 : 1.0;
    }

    static float
    vcgeFunc(float op1, float op2)
    {
        if (std::isnan(op1) || std::isnan(op2))
            return 2.0;
        return (op1 >= op2) ? 0.0 : 1.0;
    }

    static float
    vceqFunc(float op1, float op2)
    {
        if (isSnan(op1) || isSnan(op2))
            return 2.0;
        return (op1 == op2) ? 0.0 : 1.0;
    }
'''
    vcomparesL = '''
    static float
    vcleFunc(float op1, float op2)
    {
        if (std::isnan(op1) || std::isnan(op2))
            return 2.0;
        return (op1 <= op2) ? 0.0 : 1.0;
    }

    static float
    vcltFunc(float op1, float op2)
    {
        if (std::isnan(op1) || std::isnan(op2))
            return 2.0;
        return (op1 < op2) ? 0.0 : 1.0;
    }
'''
    vacomparesG = '''
    static float
    vacgtFunc(float op1, float op2)
    {
        if (std::isnan(op1) || std::isnan(op2))
            return 2.0;
        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
    }

    static float
    vacgeFunc(float op1, float op2)
    {
        if (std::isnan(op1) || std::isnan(op2))
            return 2.0;
        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
    }
'''

    exec_output += vcompares + vacomparesG

    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
    signedTypes = smallSignedTypes + ("int64_t",)
    smallTypes = smallUnsignedTypes + smallSignedTypes
    allTypes = unsignedTypes + signedTypes

    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
                          readDest=False, pairwise=False, byElem=False,
                          standardFpcsr=False, complex=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
                    RegVect srcReg1, destReg;
                    '''
        if byElem:
            # 2nd register operand has to be read fully
            eWalkCode += '''
                FullRegVect srcReg2;
                '''
        else:
            eWalkCode += '''
            RegVect srcReg2;
            '''
        for reg in range(rCount):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
                srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw);
            ''' % { "reg" : reg }
            if readDest:
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        if byElem:
            # 2nd operand has to be read fully
            for reg in range(rCount, 4):
                eWalkCode += '''
        srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw);
        ''' % { "reg" : reg }

        readDestCode = ''
        if standardFpcsr:
            eWalkCode += '''
            FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc);
            '''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'

        if complex:
            eWalkCode += op
        elif pairwise:
            eWalkCode += '''
            for (unsigned i = 0; i < eCount; i++) {
                Element srcElem1 = letoh(2 * i < eCount ?
                                        srcReg1.elements[2 * i] :
                                        srcReg2.elements[2 * i - eCount]);
                Element srcElem2 = letoh(2 * i < eCount ?
                                        srcReg1.elements[2 * i + 1] :
                                        srcReg2.elements[2 * i + 1 - eCount]);
                Element destElem;
                %(readDest)s
                %(op)s
                destReg.elements[i] = htole(destElem);
            }
            ''' % { "op" : op, "readDest" : readDestCode }
        else:
            eWalkCode += '''
            for (unsigned i = 0; i < eCount; i++) {
                Element srcElem1 = letoh(srcReg1.elements[i]);
                Element srcElem2 = letoh(srcReg2.elements[i]);
                Element destElem;
                %(readDest)s
                %(op)s
                destReg.elements[i] = htole(destElem);
            }
            ''' % { "op" : op, "readDest" : readDestCode }
        if standardFpcsr:
            eWalkCode += '''
            FpscrExc = fpscr;
            '''
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegRegImmOp" if byElem else "RegRegRegOp",
                             { "code": eWalkCode,
                               "r_count": rCount,
                               "predicate_test": predicateTest,
                               "op_class": opClass }, [])
        if byElem:
            header_output += NeonRegRegRegImmOpDeclare.subst(iop)
        else:
            header_output += NeonRegRegRegOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
                            readDest=False, pairwise=False, toInt=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        typedef float FloatVect[rCount];
        FloatVect srcRegs1, srcRegs2;
        '''
        if toInt:
            eWalkCode += 'RegVect destRegs;\n'
        else:
            eWalkCode += 'FloatVect destRegs;\n'
        for reg in range(rCount):
            eWalkCode += '''
                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
            ''' % { "reg" : reg }
            if readDest:
                if toInt:
                    eWalkCode += '''
                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
                    ''' % { "reg" : reg }
                else:
                    eWalkCode += '''
                        destRegs[%(reg)d] = FpDestP%(reg)d;
                    ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destReg = destRegs[r];'
        destType = 'float'
        writeDest = 'destRegs[r] = destReg;'
        if toInt:
            destType = 'uint32_t'
            writeDest = 'destRegs.regs[r] = destReg;'
        if pairwise:
            eWalkCode += '''
            for (unsigned r = 0; r < rCount; r++) {
                float srcReg1 = (2 * r < rCount) ?
                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
                float srcReg2 = (2 * r < rCount) ?
                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
                %(destType)s destReg;
                %(readDest)s
                %(op)s
                %(writeDest)s
            }
            ''' % { "op" : op,
                    "readDest" : readDestCode,
                    "destType" : destType,
                    "writeDest" : writeDest }
        else:
            eWalkCode += '''
            for (unsigned r = 0; r < rCount; r++) {
                float srcReg1 = srcRegs1[r];
                float srcReg2 = srcRegs2[r];
                %(destType)s destReg;
                %(readDest)s
                %(op)s
                %(writeDest)s
            }
            ''' % { "op" : op,
                    "readDest" : readDestCode,
                    "destType" : destType,
                    "writeDest" : writeDest }
        for reg in range(rCount):
            if toInt:
                eWalkCode += '''
                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
                ''' % { "reg" : reg }
            else:
                eWalkCode += '''
                FpDestP%(reg)d = destRegs[%(reg)d];
                ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "FpRegRegRegOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegRegOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def threeUnequalRegInst(name, Name, opClass, types, op,
                            bigSrc1, bigSrc2, bigDest, readDest):
        global header_output, exec_output
        src1Cnt = src2Cnt = destCnt = 2
        src1Prefix = src2Prefix = destPrefix = ''
        if bigSrc1:
            src1Cnt = 4
            src1Prefix = 'Big'
        if bigSrc2:
            src2Cnt = 4
            src2Prefix = 'Big'
        if bigDest:
            destCnt = 4
            destPrefix = 'Big'
        eWalkCode = simdEnabledCheckCode + '''
            %sRegVect srcReg1;
            %sRegVect srcReg2;
            %sRegVect destReg;
        ''' % (src1Prefix, src2Prefix, destPrefix)
        for reg in range(src1Cnt):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
            ''' % { "reg" : reg }
        for reg in range(src2Cnt):
            eWalkCode += '''
                srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw);
            ''' % { "reg" : reg }
        if readDest:
            for reg in range(destCnt):
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            %(src1Prefix)sElement srcElem1 = letoh(srcReg1.elements[i]);
            %(src1Prefix)sElement srcElem2 = letoh(srcReg2.elements[i]);
            %(destPrefix)sElement destElem;
            %(readDest)s
            %(op)s
            destReg.elements[i] = htole(destElem);
        }
        ''' % { "op" : op, "readDest" : readDestCode,
                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
                "destPrefix" : destPrefix }
        for reg in range(destCnt):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegRegOp",
                            { "code": eWalkCode,
                              "r_count": 2,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegRegOpDeclare.subst(iop)
        exec_output += NeonUnequalRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
        threeUnequalRegInst(name, Name, opClass, types, op,
                            True, True, False, readDest)

    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
        threeUnequalRegInst(name, Name, opClass, types, op,
                            False, False, True, readDest)

    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
        threeUnequalRegInst(name, Name, opClass, types, op,
                            True, False, True, readDest)

    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcReg1, srcReg2, destReg;
        '''
        for reg in range(rCount):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
                srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw);
            ''' % { "reg" : reg }
            if readDest:
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += '''
        if (imm >= eCount) {
            return std::make_shared<UndefinedInstruction>(machInst, false,
                                                          mnemonic);
        } else {
            for (unsigned i = 0; i < eCount; i++) {
                Element srcElem1 = letoh(srcReg1.elements[i]);
                Element srcElem2 = letoh(srcReg2.elements[imm]);
                Element destElem;
                %(readDest)s
                %(op)s
                destReg.elements[i] = htole(destElem);
            }
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegRegImmOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
        global header_output, exec_output
        rCount = 2
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcReg1, srcReg2;
        BigRegVect destReg = {};
        '''
        for reg in range(rCount):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
                srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw);;
            ''' % { "reg" : reg }
        if readDest:
            for reg in range(2 * rCount):
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += '''
        if (imm >= eCount) {
            fault = std::make_shared<UndefinedInstruction>(machInst, false,
                                                          mnemonic);
        } else {
            for (unsigned i = 0; i < eCount; i++) {
                Element srcElem1 = letoh(srcReg1.elements[i]);
                Element srcElem2 = letoh(srcReg2.elements[imm]);
                BigElement destElem;
                %(readDest)s
                %(op)s
                destReg.elements[i] = htole(destElem);
            }
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(2 * rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegRegImmOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
        exec_output += NeonUnequalRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        typedef float FloatVect[rCount];
        FloatVect srcRegs1, srcRegs2, destRegs;
        '''
        for reg in range(rCount):
            eWalkCode += '''
                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
            ''' % { "reg" : reg }
            if readDest:
                eWalkCode += '''
                    destRegs[%(reg)d] = FpDestP%(reg)d;
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destReg = destRegs[i];'
        eWalkCode += '''
        if (imm >= eCount) {
            return std::make_shared<UndefinedInstruction>(machInst, false,
                                                          mnemonic);
        } else {
            for (unsigned i = 0; i < rCount; i++) {
                float srcReg1 = srcRegs1[i];
                float srcReg2 = srcRegs2[imm];
                float destReg;
                %(readDest)s
                %(op)s
                destRegs[i] = destReg;
            }
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d = destRegs[%(reg)d];
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "FpRegRegRegImmOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
            readDest=False, toInt=False, fromInt=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcRegs1, destRegs;
        '''
        for reg in range(rCount):
            eWalkCode += '''
                srcRegs1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
            ''' % { "reg" : reg }
            if readDest:
                eWalkCode += '''
                    destRegs.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destRegs.elements[i]);'
            if toInt:
                readDestCode = 'destReg = letoh(destRegs.regs[i]);'
        readOpCode = 'Element srcElem1 = letoh(srcRegs1.elements[i]);'
        if fromInt:
            readOpCode = 'uint32_t srcReg1 = letoh(srcRegs1.regs[i]);'
        declDest = 'Element destElem;'
        writeDestCode = 'destRegs.elements[i] = htole(destElem);'
        if toInt:
            declDest = 'uint32_t destReg;'
            writeDestCode = 'destRegs.regs[i] = htole(destReg);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            %(readOp)s
            %(declDest)s
            %(readDest)s
            %(op)s
            %(writeDest)s
        }
        ''' % { "readOp" : readOpCode,
                "declDest" : declDest,
                "readDest" : readDestCode,
                "op" : op,
                "writeDest" : writeDestCode }
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destRegs.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegImmOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegImmOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        BigRegVect srcReg1;
        RegVect destReg;
        '''
        for reg in range(4):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
            ''' % { "reg" : reg }
        if readDest:
            for reg in range(2):
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            BigElement srcElem1 = letoh(srcReg1.elements[i]);
            Element destElem;
            %(readDest)s
            %(op)s
            destReg.elements[i] = htole(destElem);
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(2):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegImmOp",
                            { "code": eWalkCode,
                              "r_count": 2,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegImmOpDeclare.subst(iop)
        exec_output += NeonUnequalRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcReg1;
        BigRegVect destReg = {};
        '''
        for reg in range(2):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
            ''' % { "reg" : reg }
        if readDest:
            for reg in range(4):
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destReg = letoh(destReg.elements[i]);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            Element srcElem1 = letoh(srcReg1.elements[i]);
            BigElement destElem;
            %(readDest)s
            %(op)s
            destReg.elements[i] = htole(destElem);
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(4):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegImmOp",
                            { "code": eWalkCode,
                              "r_count": 2,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegImmOpDeclare.subst(iop)
        exec_output += NeonUnequalRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcReg1, destReg;
        '''
        for reg in range(rCount):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
            ''' % { "reg" : reg }
            if readDest:
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            unsigned j = i;
            Element srcElem1 = letoh(srcReg1.elements[i]);
            Element destElem;
            %(readDest)s
            %(op)s
            destReg.elements[j] = htole(destElem);
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcReg1, destReg;
        '''
        for reg in range(rCount):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
            ''' % { "reg" : reg }
            if readDest:
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            Element srcElem1 = letoh(srcReg1.elements[imm]);
            Element destElem;
            %(readDest)s
            %(op)s
            destReg.elements[i] = htole(destElem);
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegImmOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegImmOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcReg1, destReg;
        '''
        for reg in range(rCount):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
                destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
            ''' % { "reg" : reg }
            if readDest:
                eWalkCode += '''
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += op
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            FpOp1P%(reg)d_uw = letoh(srcReg1.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
            readDest=False, toInt=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        typedef float FloatVect[rCount];
        FloatVect srcRegs1;
        '''
        if toInt:
            eWalkCode += 'RegVect destRegs;\n'
        else:
            eWalkCode += 'FloatVect destRegs;\n'
        for reg in range(rCount):
            eWalkCode += '''
                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
            ''' % { "reg" : reg }
            if readDest:
                if toInt:
                    eWalkCode += '''
                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
                    ''' % { "reg" : reg }
                else:
                    eWalkCode += '''
                        destRegs[%(reg)d] = FpDestP%(reg)d;
                    ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destReg = destRegs[i];'
        destType = 'float'
        writeDest = 'destRegs[r] = destReg;'
        if toInt:
            destType = 'uint32_t'
            writeDest = 'destRegs.regs[r] = destReg;'
        eWalkCode += '''
        for (unsigned r = 0; r < rCount; r++) {
            float srcReg1 = srcRegs1[r];
            %(destType)s destReg;
            %(readDest)s
            %(op)s
            %(writeDest)s
        }
        ''' % { "op" : op,
                "readDest" : readDestCode,
                "destType" : destType,
                "writeDest" : writeDest }
        for reg in range(rCount):
            if toInt:
                eWalkCode += '''
                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
                ''' % { "reg" : reg }
            else:
                eWalkCode += '''
                FpDestP%(reg)d = destRegs[%(reg)d];
                ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "FpRegRegOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcRegs;
        BigRegVect destReg = {};
        '''
        for reg in range(rCount):
            eWalkCode += '''
                srcRegs.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
            ''' % { "reg" : reg }
            if readDest:
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount / 2; i++) {
            Element srcElem1 = letoh(srcRegs.elements[2 * i]);
            Element srcElem2 = letoh(srcRegs.elements[2 * i + 1]);
            BigElement destElem;
            %(readDest)s
            %(op)s
            destReg.elements[i] = htole(destElem);
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegOpDeclare.subst(iop)
        exec_output += NeonUnequalRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        BigRegVect srcReg1;
        RegVect destReg;
        '''
        for reg in range(4):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
            ''' % { "reg" : reg }
        if readDest:
            for reg in range(2):
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            BigElement srcElem1 = letoh(srcReg1.elements[i]);
            Element destElem;
            %(readDest)s
            %(op)s
            destReg.elements[i] = htole(destElem);
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(2):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegOp",
                            { "code": eWalkCode,
                              "r_count": 2,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegOpDeclare.subst(iop)
        exec_output += NeonUnequalRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect destReg;
        '''
        if readDest:
            for reg in range(rCount):
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            Element destElem;
            %(readDest)s
            %(op)s
            destReg.elements[i] = htole(destElem);
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegImmOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegImmOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcReg1;
        BigRegVect destReg = {};
        '''
        for reg in range(2):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
            ''' % { "reg" : reg }
        if readDest:
            for reg in range(4):
                eWalkCode += '''
                    destReg.regs[%(reg)d] = htole(FpDestP%(reg)d_uw);
                ''' % { "reg" : reg }
        readDestCode = ''
        if readDest:
            readDestCode = 'destReg = letoh(destReg.elements[i]);'
        eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            Element srcElem1 = letoh(srcReg1.elements[i]);
            BigElement destElem;
            %(readDest)s
            %(op)s
            destReg.elements[i] = htole(destElem);
        }
        ''' % { "op" : op, "readDest" : readDestCode }
        for reg in range(4):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegOp",
                            { "code": eWalkCode,
                              "r_count": 2,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegOpDeclare.subst(iop)
        exec_output += NeonUnequalRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    vhaddCode = '''
        Element carryBit =
            (((unsigned)srcElem1 & 0x1) +
             ((unsigned)srcElem2 & 0x1)) >> 1;
        // Use division instead of a shift to ensure the sign extension works
        // right. The compiler will figure out if it can be a shift. Mask the
        // inputs so they get truncated correctly.
        destElem = (((srcElem1 & ~(Element)1) / 2) +
                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
    '''
    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)

    vrhaddCode = '''
        Element carryBit =
            (((unsigned)srcElem1 & 0x1) +
             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
        // Use division instead of a shift to ensure the sign extension works
        // right. The compiler will figure out if it can be a shift. Mask the
        // inputs so they get truncated correctly.
        destElem = (((srcElem1 & ~(Element)1) / 2) +
                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
    '''
    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)

    vhsubCode = '''
        Element barrowBit =
            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
        // Use division instead of a shift to ensure the sign extension works
        // right. The compiler will figure out if it can be a shift. Mask the
        // inputs so they get truncated correctly.
        destElem = (((srcElem1 & ~(Element)1) / 2) -
                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
    '''
    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)

    vandCode = '''
        destElem = srcElem1 & srcElem2;
    '''
    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)

    vbicCode = '''
        destElem = srcElem1 & ~srcElem2;
    '''
    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)

    vorrCode = '''
        destElem = srcElem1 | srcElem2;
    '''
    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)

    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)

    vornCode = '''
        destElem = srcElem1 | ~srcElem2;
    '''
    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)

    veorCode = '''
        destElem = srcElem1 ^ srcElem2;
    '''
    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)

    vbifCode = '''
        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
    '''
    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
    vbitCode = '''
        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
    '''
    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
    vbslCode = '''
        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
    '''
    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)

    vmaxCode = '''
        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
    '''
    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)

    vminCode = '''
        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
    '''
    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)

    vaddCode = '''
        destElem = srcElem1 + srcElem2;
    '''
    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)

    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
                      2, vaddCode, pairwise=True)
    vaddlwCode = '''
        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
    '''
    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
    vaddhnCode = '''
        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
                   (sizeof(Element) * 8);
    '''
    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
    vraddhnCode = '''
        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
                   (sizeof(Element) * 8);
    '''
    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)

    vsubCode = '''
        destElem = srcElem1 - srcElem2;
    '''
    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
    vsublwCode = '''
        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
    '''
    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)

    vqaddUCode = '''
        destElem = srcElem1 + srcElem2;
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (destElem < srcElem1 || destElem < srcElem2) {
            destElem = (Element)(-1);
            fpscr.qc = 1;
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
    vsubhnCode = '''
        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
                   (sizeof(Element) * 8);
    '''
    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
    vrsubhnCode = '''
        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
                   (sizeof(Element) * 8);
    '''
    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)

    vcaddCode = '''
        bool rot = bits(machInst, 24);
        Element el1;
        Element el3;

        for (int i = 0; i < eCount/2; ++i) {
            Element srcElem1_1 = letoh(srcReg1.elements[2*i]);
            Element srcElem1_2 = letoh(srcReg1.elements[2*i+1]);
            Element srcElem2_1 = letoh(srcReg2.elements[2*i]);
            Element srcElem2_2 = letoh(srcReg2.elements[2*i+1]);
            Element destElem_1;
            Element destElem_2;
            if (rot) {
                el1 = srcElem2_2;
                el3 = fplibNeg<Element>(srcElem2_1);
            } else {
                el1 = fplibNeg<Element>(srcElem2_2);
                el3 = srcElem2_1;
            }

            destElem_1 = fplibAdd<Element>(srcElem1_1, el1, fpscr);
            destElem_2 = fplibAdd<Element>(srcElem1_2, el3, fpscr);
            destReg.elements[2*i] = htole(destElem_1);
            destReg.elements[2*i+1] = htole(destElem_2);
         }
         '''

    # VCADD
    threeEqualRegInst("vcadd", "VcaddD", "SimdFloatAddOp",
                            ("uint16_t", "uint32_t"), 2, vcaddCode,
                            standardFpcsr=True, complex=True)
    threeEqualRegInst("vcadd", "VcaddQ", "SimdFloatAddOp",
                            ("uint16_t", "uint32_t"), 4,
                           vcaddCode, standardFpcsr=True, complex=True)

    vcmlaCode = '''
        uint8_t rot = bits(machInst, %(rot)s);
        Element el1;
        Element el2;
        Element el3;
        Element el4;
        for (int i = 0; i < eCount/2; ++i) {

            Element srcElem1_1 = letoh(srcReg1.elements[2*i]);
            Element srcElem1_2 = letoh(srcReg1.elements[2*i+1]);
            Element srcElem2_1 = letoh(srcReg2.elements[2*%(index)s]);
            Element srcElem2_2 = letoh(srcReg2.elements[2*%(index)s+1]);
            Element destElem_1 = letoh(destReg.elements[2*i]);
            Element destElem_2 = letoh(destReg.elements[2*i+1]);

            switch (rot) {
              case 0x0:
                {
                  el1 = srcElem2_1;
                  el2 = srcElem1_1;
                  el3 = srcElem2_2;
                  el4 = srcElem1_1;
                  break;
                }
              case 0x1:
                {
                  el1 = fplibNeg<Element>(srcElem2_2);
                  el2 = srcElem1_2;
                  el3 = srcElem2_1;
                  el4 = srcElem1_2;
                  break;
                }
              case 0x2:
                {
                  el1 = fplibNeg<Element>(srcElem2_1);
                  el2 = srcElem1_1;
                  el3 = fplibNeg<Element>(srcElem2_2);
                  el4 = srcElem1_1;
                  break;
                }
              case 0x3:
                {
                  el1 = srcElem2_2;
                  el2 = srcElem1_2;
                  el3 = fplibNeg<Element>(srcElem2_1);
                  el4 = srcElem1_2;
                  break;
                }
            }

            destElem_1 = fplibMulAdd<Element>(destElem_1, el2, el1, fpscr);
            destElem_2 = fplibMulAdd<Element>(destElem_2, el4, el3, fpscr);

            destReg.elements[2*i] = htole(destElem_1);
            destReg.elements[2*i+1] = htole(destElem_2);
         }
         '''

    # VCMLA (by element)
    vcmla_imm = vcmlaCode % {'rot': '21, 20', 'index': 'imm'}
    threeEqualRegInst("vcmla", "VcmlaElemD", "SimdFloatMultAccOp",
                           ("uint16_t", "uint32_t"), 2, vcmla_imm,
                           readDest=True, byElem=True, standardFpcsr=True,
                           complex=True)
    threeEqualRegInst("vcmla", "VcmlaElemQ", "SimdFloatMultAccOp",
                           ("uint16_t", "uint32_t"), 4, vcmla_imm,
                           readDest=True, byElem=True, standardFpcsr=True,
                           complex=True)

    # FCMLA (vector)
    vcmla_vec = vcmlaCode % {'rot': '24, 23', 'index': 'i'}
    threeEqualRegInst("vcmla", "VcmlaD", "SimdFloatMultAccOp",
                            ("uint16_t", "uint32_t"), 2, vcmla_vec,
                             readDest=True, standardFpcsr=True, complex=True)
    threeEqualRegInst("vcmla", "VcmlaQ", "SimdFloatMultAccOp",
                            ("uint16_t", "uint32_t"), 4, vcmla_vec,
                             readDest=True, standardFpcsr=True, complex=True)

    vqaddSCode = '''
        destElem = srcElem1 + srcElem2;
        FPSCR fpscr = (FPSCR) FpscrQc;
        bool negDest = (destElem < 0);
        bool negSrc1 = (srcElem1 < 0);
        bool negSrc2 = (srcElem2 < 0);
        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
            if (negDest)
                /* If (>=0) plus (>=0) yields (<0), saturate to +. */
                destElem = std::numeric_limits<Element>::max();
            else
                /* If (<0) plus (<0) yields (>=0), saturate to -. */
                destElem = std::numeric_limits<Element>::min();
            fpscr.qc = 1;
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)

    vqsubUCode = '''
        destElem = srcElem1 - srcElem2;
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (destElem > srcElem1) {
            destElem = 0;
            fpscr.qc = 1;
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)

    vqsubSCode = '''
        destElem = srcElem1 - srcElem2;
        FPSCR fpscr = (FPSCR) FpscrQc;
        bool negDest = (destElem < 0);
        bool negSrc1 = (srcElem1 < 0);
        bool posSrc2 = (srcElem2 >= 0);
        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
            if (negDest)
                /* If (>=0) minus (<0) yields (<0), saturate to +. */
                destElem = std::numeric_limits<Element>::max();
            else
                /* If (<0) minus (>=0) yields (>=0), saturate to -. */
                destElem = std::numeric_limits<Element>::min();
            fpscr.qc = 1;
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)

    vcgtCode = '''
        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
    '''
    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)

    vcgeCode = '''
        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
    '''
    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)

    vceqCode = '''
        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
    '''
    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)

    vshlCode = '''
        int16_t shiftAmt = (int8_t)srcElem2;
        if (shiftAmt < 0) {
            shiftAmt = -shiftAmt;
            if (shiftAmt >= sizeof(Element) * 8) {
                shiftAmt = sizeof(Element) * 8 - 1;
                destElem = 0;
            } else {
                destElem = (srcElem1 >> shiftAmt);
            }
            // Make sure the right shift sign extended when it should.
            if (ltz(srcElem1) && !ltz(destElem)) {
                destElem |= -((Element)1 << (sizeof(Element) * 8 -
                                             1 - shiftAmt));
            }
        } else {
            if (shiftAmt >= sizeof(Element) * 8) {
                destElem = 0;
            } else {
                destElem = srcElem1 << shiftAmt;
            }
        }
    '''
    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)

    vrshlCode = '''
        int16_t shiftAmt = (int8_t)srcElem2;
        if (shiftAmt < 0) {
            shiftAmt = -shiftAmt;
            Element rBit = 0;
            if (shiftAmt <= sizeof(Element) * 8)
                rBit = bits(srcElem1, shiftAmt - 1);
            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
                rBit = 1;
            if (shiftAmt >= sizeof(Element) * 8) {
                shiftAmt = sizeof(Element) * 8 - 1;
                destElem = 0;
            } else {
                destElem = (srcElem1 >> shiftAmt);
            }
            // Make sure the right shift sign extended when it should.
            if (ltz(srcElem1) && !ltz(destElem)) {
                destElem |= -((Element)1 << (sizeof(Element) * 8 -
                                             1 - shiftAmt));
            }
            destElem += rBit;
        } else if (shiftAmt > 0) {
            if (shiftAmt >= sizeof(Element) * 8) {
                destElem = 0;
            } else {
                destElem = srcElem1 << shiftAmt;
            }
        } else {
            destElem = srcElem1;
        }
    '''
    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)

    vqshlUCode = '''
        int16_t shiftAmt = (int8_t)srcElem2;
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (shiftAmt < 0) {
            shiftAmt = -shiftAmt;
            if (shiftAmt >= sizeof(Element) * 8) {
                shiftAmt = sizeof(Element) * 8 - 1;
                destElem = 0;
            } else {
                destElem = (srcElem1 >> shiftAmt);
            }
        } else if (shiftAmt > 0) {
            if (shiftAmt >= sizeof(Element) * 8) {
                if (srcElem1 != 0) {
                    destElem = mask(sizeof(Element) * 8);
                    fpscr.qc = 1;
                } else {
                    destElem = 0;
                }
            } else {
                if (bits(srcElem1, sizeof(Element) * 8 - 1,
                            sizeof(Element) * 8 - shiftAmt)) {
                    destElem = mask(sizeof(Element) * 8);
                    fpscr.qc = 1;
                } else {
                    destElem = srcElem1 << shiftAmt;
                }
            }
        } else {
            destElem = srcElem1;
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)

    vqshlSCode = '''
        int16_t shiftAmt = (int8_t)srcElem2;
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (shiftAmt < 0) {
            shiftAmt = -shiftAmt;
            if (shiftAmt >= sizeof(Element) * 8) {
                shiftAmt = sizeof(Element) * 8 - 1;
                destElem = 0;
            } else {
                destElem = (srcElem1 >> shiftAmt);
            }
            // Make sure the right shift sign extended when it should.
            if (srcElem1 < 0 && destElem >= 0) {
                destElem |= -((Element)1 << (sizeof(Element) * 8 -
                                             1 - shiftAmt));
            }
        } else if (shiftAmt > 0) {
            bool sat = false;
            if (shiftAmt >= sizeof(Element) * 8) {
                if (srcElem1 != 0)
                    sat = true;
                else
                    destElem = 0;
            } else {
                if (bits(srcElem1, sizeof(Element) * 8 - 1,
                            sizeof(Element) * 8 - 1 - shiftAmt) !=
                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
                    sat = true;
                } else {
                    destElem = srcElem1 << shiftAmt;
                }
            }
            if (sat) {
                fpscr.qc = 1;
                destElem = mask(sizeof(Element) * 8 - 1);
                if (srcElem1 < 0)
                    destElem = ~destElem;
            }
        } else {
            destElem = srcElem1;
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)

    vqrshlUCode = '''
        int16_t shiftAmt = (int8_t)srcElem2;
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (shiftAmt < 0) {
            shiftAmt = -shiftAmt;
            Element rBit = 0;
            if (shiftAmt <= sizeof(Element) * 8)
                rBit = bits(srcElem1, shiftAmt - 1);
            if (shiftAmt >= sizeof(Element) * 8) {
                shiftAmt = sizeof(Element) * 8 - 1;
                destElem = 0;
            } else {
                destElem = (srcElem1 >> shiftAmt);
            }
            destElem += rBit;
        } else {
            if (shiftAmt >= sizeof(Element) * 8) {
                if (srcElem1 != 0) {
                    destElem = mask(sizeof(Element) * 8);
                    fpscr.qc = 1;
                } else {
                    destElem = 0;
                }
            } else {
                if (bits(srcElem1, sizeof(Element) * 8 - 1,
                            sizeof(Element) * 8 - shiftAmt)) {
                    destElem = mask(sizeof(Element) * 8);
                    fpscr.qc = 1;
                } else {
                    destElem = srcElem1 << shiftAmt;
                }
            }
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)

    vqrshlSCode = '''
        int16_t shiftAmt = (int8_t)srcElem2;
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (shiftAmt < 0) {
            shiftAmt = -shiftAmt;
            Element rBit = 0;
            if (shiftAmt <= sizeof(Element) * 8)
                rBit = bits(srcElem1, shiftAmt - 1);
            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
                rBit = 1;
            if (shiftAmt >= sizeof(Element) * 8) {
                shiftAmt = sizeof(Element) * 8 - 1;
                destElem = 0;
            } else {
                destElem = (srcElem1 >> shiftAmt);
            }
            // Make sure the right shift sign extended when it should.
            if (srcElem1 < 0 && destElem >= 0) {
                destElem |= -((Element)1 << (sizeof(Element) * 8 -
                                             1 - shiftAmt));
            }
            destElem += rBit;
        } else if (shiftAmt > 0) {
            bool sat = false;
            if (shiftAmt >= sizeof(Element) * 8) {
                if (srcElem1 != 0)
                    sat = true;
                else
                    destElem = 0;
            } else {
                if (bits(srcElem1, sizeof(Element) * 8 - 1,
                            sizeof(Element) * 8 - 1 - shiftAmt) !=
                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
                    sat = true;
                } else {
                    destElem = srcElem1 << shiftAmt;
                }
            }
            if (sat) {
                fpscr.qc = 1;
                destElem = mask(sizeof(Element) * 8 - 1);
                if (srcElem1 < 0)
                    destElem = ~destElem;
            }
        } else {
            destElem = srcElem1;
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)

    vabaCode = '''
        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
                                            (srcElem2 - srcElem1);
    '''
    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
    vabalCode = '''
        destElem += (srcElem1 > srcElem2) ?
            ((BigElement)srcElem1 - (BigElement)srcElem2) :
            ((BigElement)srcElem2 - (BigElement)srcElem1);
    '''
    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)

    vabdCode = '''
        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
                                           (srcElem2 - srcElem1);
    '''
    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
    vabdlCode = '''
        destElem = (srcElem1 > srcElem2) ?
            ((BigElement)srcElem1 - (BigElement)srcElem2) :
            ((BigElement)srcElem2 - (BigElement)srcElem1);
    '''
    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)

    vtstCode = '''
        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
    '''
    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)

    vmulCode = '''
        destElem = srcElem1 * srcElem2;
    '''
    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
    vmullCode = '''
        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
    '''
    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)

    vmlaCode = '''
        destElem = destElem + srcElem1 * srcElem2;
    '''
    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
    vmlalCode = '''
        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
    '''
    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)

    vqdmlalCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
        Element maxNeg = std::numeric_limits<Element>::min();
        Element halfNeg = maxNeg / 2;
        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
            fpscr.qc = 1;
        }
        bool negPreDest = ltz(destElem);
        destElem += midElem;
        bool negDest = ltz(destElem);
        bool negMid = ltz(midElem);
        if (negPreDest == negMid && negMid != negDest) {
            destElem = mask(sizeof(BigElement) * 8 - 1);
            if (negPreDest)
                destElem = ~destElem;
            fpscr.qc = 1;
        }
        FpscrQc = fpscr;
    '''
    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)

    vqdmlslCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
        Element maxNeg = std::numeric_limits<Element>::min();
        Element halfNeg = maxNeg / 2;
        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
            fpscr.qc = 1;
        }
        bool negPreDest = ltz(destElem);
        destElem -= midElem;
        bool negDest = ltz(destElem);
        bool posMid = ltz((BigElement)-midElem);
        if (negPreDest == posMid && posMid != negDest) {
            destElem = mask(sizeof(BigElement) * 8 - 1);
            if (negPreDest)
                destElem = ~destElem;
            fpscr.qc = 1;
        }
        FpscrQc = fpscr;
    '''
    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)

    vqdmullCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
        if (srcElem1 == srcElem2 &&
                srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
            fpscr.qc = 1;
        }
        FpscrQc = fpscr;
    '''
    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)

    vmlsCode = '''
        destElem = destElem - srcElem1 * srcElem2;
    '''
    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
    vmlslCode = '''
        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
    '''
    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)

    vmulpCode = '''
        destElem = 0;
        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
            if (bits(srcElem2, j))
                destElem ^= srcElem1 << j;
        }
    '''
    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
    vmullpCode = '''
        destElem = 0;
        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
            if (bits(srcElem2, j))
                destElem ^= (BigElement)srcElem1 << j;
        }
    '''
    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)

    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)

    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)

    vqdmulhCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
                   (sizeof(Element) * 8);
        if (srcElem1 == srcElem2 &&
                srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
            destElem = ~srcElem1;
            fpscr.qc = 1;
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)

    vqrdmulhCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
                   (sizeof(Element) * 8);
        Element maxNeg = std::numeric_limits<Element>::min();
        Element halfNeg = maxNeg / 2;
        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
            if (destElem < 0) {
                destElem = mask(sizeof(Element) * 8 - 1);
            } else {
                destElem = std::numeric_limits<Element>::min();
            }
            fpscr.qc = 1;
        }
        FpscrQc = fpscr;
    '''
    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)

    vMinMaxFpCode = '''
        destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
    '''
    vMinMaxInsts = [
        ("vmax",   "VmaxDFp",   2, "Max",    False, ),
        ("vmax",   "VmaxQFp",   4, "Max",    False, ),
        ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ),
        ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ),
        ("vpmax",  "VpmaxDFp",  2, "Max",    True,  ),
        ("vpmax",  "VpmaxQFp",  4, "Max",    True,  ),
        ("vmin",   "VminDFp",   2, "Min",    False, ),
        ("vmin",   "VminQFp",   4, "Min",    False, ),
        ("vminnm", "VminnmDFp", 2, "MinNum", False, ),
        ("vminnm", "VminnmQFp", 4, "MinNum", False, ),
        ("vpmin",  "VpminDFp",  2, "Min",    True,  ),
        ("vpmin",  "VpminQFp",  4, "Min",    True,  ),
    ]
    for name, Name, rCount, op, pairwise in vMinMaxInsts:
        threeEqualRegInst(
            name,
            Name,
            "SimdFloatCmpOp",
            ("uint32_t",),
            rCount,
            vMinMaxFpCode % op,
            pairwise=pairwise,
            standardFpcsr=True,
        )

    vaddfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
                           true, true, VfpRoundNearest);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)

    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
                        2, vaddfpCode, pairwise=True)
    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
                        4, vaddfpCode, pairwise=True)

    vsubfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
                           true, true, VfpRoundNearest);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)

    vmulfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
                           true, true, VfpRoundNearest);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)

    vmlafpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
                             true, true, VfpRoundNearest);
        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
                           true, true, VfpRoundNearest);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)

    vfmafpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
                            true, true, VfpRoundNearest);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
    threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)

    vfmsfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
                            true, true, VfpRoundNearest);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
    threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)

    vmlsfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
                             true, true, VfpRoundNearest);
        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
                           true, true, VfpRoundNearest);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)

    vcgtfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
            2, vcgtfpCode, toInt = True)
    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
            4, vcgtfpCode, toInt = True)

    vcgefpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
            2, vcgefpCode, toInt = True)
    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
            4, vcgefpCode, toInt = True)

    vacgtfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
            2, vacgtfpCode, toInt = True)
    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
            4, vacgtfpCode, toInt = True)

    vacgefpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
            2, vacgefpCode, toInt = True)
    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
            4, vacgefpCode, toInt = True)

    vceqfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
            2, vceqfpCode, toInt = True)
    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
            4, vceqfpCode, toInt = True)

    vrecpsCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
                           true, true, VfpRoundNearest);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)

    vrsqrtsCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
                           true, true, VfpRoundNearest);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)

    vabdfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
                             true, true, VfpRoundNearest);
        destReg = fabs(mid);
        FpscrExc = fpscr;
    '''
    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)

    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)

    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)

    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)

    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)

    vshrCode = '''
        if (imm >= sizeof(srcElem1) * 8) {
            if (ltz(srcElem1))
                destElem = -1;
            else
                destElem = 0;
        } else {
            destElem = srcElem1 >> imm;
        }
    '''
    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)

    vsraCode = '''
        Element mid;;
        if (imm >= sizeof(srcElem1) * 8) {
            mid = ltz(srcElem1) ? -1 : 0;
        } else {
            mid = srcElem1 >> imm;
            if (ltz(srcElem1) && !ltz(mid)) {
                mid |= -(mid & ((Element)1 <<
                            (sizeof(Element) * 8 - 1 - imm)));
            }
        }
        destElem += mid;
    '''
    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)

    vrshrCode = '''
        if (imm > sizeof(srcElem1) * 8) {
            destElem = 0;
        } else if (imm) {
            Element rBit = bits(srcElem1, imm - 1);
            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
        } else {
            destElem = srcElem1;
        }
    '''
    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)

    vrsraCode = '''
        if (imm > sizeof(srcElem1) * 8) {
            destElem += 0;
        } else if (imm) {
            Element rBit = bits(srcElem1, imm - 1);
            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
        } else {
            destElem += srcElem1;
        }
    '''
    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)

    vsriCode = '''
        if (imm >= sizeof(Element) * 8) {
            destElem = destElem;
        } else {
            destElem = (srcElem1 >> imm) |
                (destElem & ~mask(sizeof(Element) * 8 - imm));
        }
    '''
    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)

    vshlCode = '''
        if (imm >= sizeof(Element) * 8) {
            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
        } else {
            destElem = srcElem1 << imm;
        }
    '''
    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)

    vsliCode = '''
        if (imm >= sizeof(Element) * 8) {
            destElem = destElem;
        } else {
            destElem = (srcElem1 << imm) | (destElem & mask(imm));
        }
    '''
    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)

    vqshlCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (imm >= sizeof(Element) * 8) {
            if (srcElem1 != 0) {
                destElem = std::numeric_limits<Element>::min();
                if (srcElem1 > 0)
                    destElem = ~destElem;
                fpscr.qc = 1;
            } else {
                destElem = 0;
            }
        } else if (imm) {
            destElem = (srcElem1 << imm);
            uint64_t topBits = bits((uint64_t)srcElem1,
                                    sizeof(Element) * 8 - 1,
                                    sizeof(Element) * 8 - 1 - imm);
            if (topBits != 0 && topBits != mask(imm + 1)) {
                destElem = std::numeric_limits<Element>::min();
                if (srcElem1 > 0)
                    destElem = ~destElem;
                fpscr.qc = 1;
            }
        } else {
            destElem = srcElem1;
        }
        FpscrQc = fpscr;
    '''
    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)

    vqshluCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (imm >= sizeof(Element) * 8) {
            if (srcElem1 != 0) {
                destElem = mask(sizeof(Element) * 8);
                fpscr.qc = 1;
            } else {
                destElem = 0;
            }
        } else if (imm) {
            destElem = (srcElem1 << imm);
            uint64_t topBits = bits((uint64_t)srcElem1,
                                    sizeof(Element) * 8 - 1,
                                    sizeof(Element) * 8 - imm);
            if (topBits != 0) {
                destElem = mask(sizeof(Element) * 8);
                fpscr.qc = 1;
            }
        } else {
            destElem = srcElem1;
        }
        FpscrQc = fpscr;
    '''
    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)

    vqshlusCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (imm >= sizeof(Element) * 8) {
            if (srcElem1 < 0) {
                destElem = 0;
                fpscr.qc = 1;
            } else if (srcElem1 > 0) {
                destElem = mask(sizeof(Element) * 8);
                fpscr.qc = 1;
            } else {
                destElem = 0;
            }
        } else if (imm) {
            destElem = (srcElem1 << imm);
            uint64_t topBits = bits((uint64_t)srcElem1,
                                    sizeof(Element) * 8 - 1,
                                    sizeof(Element) * 8 - imm);
            if (srcElem1 < 0) {
                destElem = 0;
                fpscr.qc = 1;
            } else if (topBits != 0) {
                destElem = mask(sizeof(Element) * 8);
                fpscr.qc = 1;
            }
        } else {
            if (srcElem1 < 0) {
                fpscr.qc = 1;
                destElem = 0;
            } else {
                destElem = srcElem1;
            }
        }
        FpscrQc = fpscr;
    '''
    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)

    vshrnCode = '''
        if (imm >= sizeof(srcElem1) * 8) {
            destElem = 0;
        } else {
            destElem = srcElem1 >> imm;
        }
    '''
    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)

    vrshrnCode = '''
        if (imm > sizeof(srcElem1) * 8) {
            destElem = 0;
        } else if (imm) {
            Element rBit = bits(srcElem1, imm - 1);
            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
        } else {
            destElem = srcElem1;
        }
    '''
    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)

    vqshrnCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (imm > sizeof(srcElem1) * 8) {
            if (srcElem1 != 0 && srcElem1 != -1)
                fpscr.qc = 1;
            destElem = 0;
        } else if (imm) {
            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
            mid |= -(mid & ((BigElement)1 <<
                        (sizeof(BigElement) * 8 - 1 - imm)));
            if (mid != (Element)mid) {
                destElem = mask(sizeof(Element) * 8 - 1);
                if (srcElem1 < 0)
                    destElem = ~destElem;
                fpscr.qc = 1;
            } else {
                destElem = mid;
            }
        } else {
            destElem = srcElem1;
        }
        FpscrQc = fpscr;
    '''
    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)

    vqshrunCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (imm > sizeof(srcElem1) * 8) {
            if (srcElem1 != 0)
                fpscr.qc = 1;
            destElem = 0;
        } else if (imm) {
            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
            if (mid != (Element)mid) {
                destElem = mask(sizeof(Element) * 8);
                fpscr.qc = 1;
            } else {
                destElem = mid;
            }
        } else {
            destElem = srcElem1;
        }
        FpscrQc = fpscr;
    '''
    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)

    vqshrunsCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (imm > sizeof(srcElem1) * 8) {
            if (srcElem1 != 0)
                fpscr.qc = 1;
            destElem = 0;
        } else if (imm) {
            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
            if (bits(mid, sizeof(BigElement) * 8 - 1,
                          sizeof(Element) * 8) != 0) {
                if (srcElem1 < 0) {
                    destElem = 0;
                } else {
                    destElem = mask(sizeof(Element) * 8);
                }
                fpscr.qc = 1;
            } else {
                destElem = mid;
            }
        } else {
            destElem = srcElem1;
        }
        FpscrQc = fpscr;
    '''
    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)

    vqrshrnCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (imm > sizeof(srcElem1) * 8) {
            if (srcElem1 != 0 && srcElem1 != -1)
                fpscr.qc = 1;
            destElem = 0;
        } else if (imm) {
            BigElement mid = (srcElem1 >> (imm - 1));
            uint64_t rBit = mid & 0x1;
            mid >>= 1;
            mid |= -(mid & ((BigElement)1 <<
                        (sizeof(BigElement) * 8 - 1 - imm)));
            mid += rBit;
            if (mid != (Element)mid) {
                destElem = mask(sizeof(Element) * 8 - 1);
                if (srcElem1 < 0)
                    destElem = ~destElem;
                fpscr.qc = 1;
            } else {
                destElem = mid;
            }
        } else {
            if (srcElem1 != (Element)srcElem1) {
                destElem = mask(sizeof(Element) * 8 - 1);
                if (srcElem1 < 0)
                    destElem = ~destElem;
                fpscr.qc = 1;
            } else {
                destElem = srcElem1;
            }
        }
        FpscrQc = fpscr;
    '''
    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)

    vqrshrunCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (imm > sizeof(srcElem1) * 8) {
            if (srcElem1 != 0)
                fpscr.qc = 1;
            destElem = 0;
        } else if (imm) {
            BigElement mid = (srcElem1 >> (imm - 1));
            uint64_t rBit = mid & 0x1;
            mid >>= 1;
            mid += rBit;
            if (mid != (Element)mid) {
                destElem = mask(sizeof(Element) * 8);
                fpscr.qc = 1;
            } else {
                destElem = mid;
            }
        } else {
            if (srcElem1 != (Element)srcElem1) {
                destElem = mask(sizeof(Element) * 8 - 1);
                fpscr.qc = 1;
            } else {
                destElem = srcElem1;
            }
        }
        FpscrQc = fpscr;
    '''
    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)

    vqrshrunsCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (imm > sizeof(srcElem1) * 8) {
            if (srcElem1 != 0)
                fpscr.qc = 1;
            destElem = 0;
        } else if (imm) {
            BigElement mid = (srcElem1 >> (imm - 1));
            uint64_t rBit = mid & 0x1;
            mid >>= 1;
            mid |= -(mid & ((BigElement)1 <<
                            (sizeof(BigElement) * 8 - 1 - imm)));
            mid += rBit;
            if (bits(mid, sizeof(BigElement) * 8 - 1,
                          sizeof(Element) * 8) != 0) {
                if (srcElem1 < 0) {
                    destElem = 0;
                } else {
                    destElem = mask(sizeof(Element) * 8);
                }
                fpscr.qc = 1;
            } else {
                destElem = mid;
            }
        } else {
            if (srcElem1 < 0) {
                fpscr.qc = 1;
                destElem = 0;
            } else {
                destElem = srcElem1;
            }
        }
        FpscrQc = fpscr;
    '''
    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)

    vshllCode = '''
        if (imm >= sizeof(destElem) * 8) {
            destElem = 0;
        } else {
            destElem = (BigElement)srcElem1 << imm;
        }
    '''
    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)

    vmovlCode = '''
        destElem = srcElem1;
    '''
    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)

    vcvt2ufxCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        if (flushToZero(srcElem1))
            fpscr.idc = 1;
        VfpSavedState state = prepFpState(VfpRoundNearest);
        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
        destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
        __asm__ __volatile__("" :: "m" (destReg));
        finishVfp(fpscr, state, true);
        FpscrExc = fpscr;
    '''
    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
            2, vcvt2ufxCode, toInt = True)
    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
            4, vcvt2ufxCode, toInt = True)

    vcvt2sfxCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        if (flushToZero(srcElem1))
            fpscr.idc = 1;
        VfpSavedState state = prepFpState(VfpRoundNearest);
        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
        destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
        __asm__ __volatile__("" :: "m" (destReg));
        finishVfp(fpscr, state, true);
        FpscrExc = fpscr;
    '''
    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
            2, vcvt2sfxCode, toInt = True)
    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
            4, vcvt2sfxCode, toInt = True)

    vcvtu2fpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        VfpSavedState state = prepFpState(VfpRoundNearest);
        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
        destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
        __asm__ __volatile__("" :: "m" (destElem));
        finishVfp(fpscr, state, true);
        FpscrExc = fpscr;
    '''
    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
            2, vcvtu2fpCode, fromInt = True)
    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
            4, vcvtu2fpCode, fromInt = True)

    vcvts2fpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        VfpSavedState state = prepFpState(VfpRoundNearest);
        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
        destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
        __asm__ __volatile__("" :: "m" (destElem));
        finishVfp(fpscr, state, true);
        FpscrExc = fpscr;
    '''
    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
            2, vcvts2fpCode, fromInt = True)
    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
            4, vcvts2fpCode, fromInt = True)

    vcvts2hCode = '''
        destElem = 0;
        FPSCR fpscr = (FPSCR) FpscrExc;
        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
        if (flushToZero(srcFp1))
            fpscr.idc = 1;
        VfpSavedState state = prepFpState(VfpRoundNearest);
        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
                                : "m" (srcFp1), "m" (destElem));
        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
                              fpscr.ahp, srcFp1);
        __asm__ __volatile__("" :: "m" (destElem));
        finishVfp(fpscr, state, true);
        FpscrExc = fpscr;
    '''
    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)

    vcvth2sCode = '''
        destElem = 0;
        FPSCR fpscr = (FPSCR) FpscrExc;
        VfpSavedState state = prepFpState(VfpRoundNearest);
        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
                                : "m" (srcElem1), "m" (destElem));
        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
        __asm__ __volatile__("" :: "m" (destElem));
        finishVfp(fpscr, state, true);
        FpscrExc = fpscr;
    '''
    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)

    vrsqrteCode = '''
        destElem = unsignedRSqrtEstimate(srcElem1);
    '''
    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)

    vrsqrtefpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        if (flushToZero(srcReg1))
            fpscr.idc = 1;
        destReg = fprSqrtEstimate(fpscr, srcReg1);
        FpscrExc = fpscr;
    '''
    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)

    vrecpeCode = '''
        destElem = unsignedRecipEstimate(srcElem1);
    '''
    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)

    vrecpefpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        if (flushToZero(srcReg1))
            fpscr.idc = 1;
        destReg = fpRecipEstimate(fpscr, srcReg1);
        FpscrExc = fpscr;
    '''
    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)

    vrev16Code = '''
        destElem = srcElem1;
        unsigned groupSize = ((1 << 1) / sizeof(Element));
        unsigned reverseMask = (groupSize - 1);
        j = i ^ reverseMask;
    '''
    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
    vrev32Code = '''
        destElem = srcElem1;
        unsigned groupSize = ((1 << 2) / sizeof(Element));
        unsigned reverseMask = (groupSize - 1);
        j = i ^ reverseMask;
    '''
    twoRegMiscInst("vrev32", "NVrev32D",
            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
    twoRegMiscInst("vrev32", "NVrev32Q",
            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
    vrev64Code = '''
        destElem = srcElem1;
        unsigned groupSize = ((1 << 3) / sizeof(Element));
        unsigned reverseMask = (groupSize - 1);
        j = i ^ reverseMask;
    '''
    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)

    split('exec')
    exec_output += vcompares + vcomparesL

    vpaddlCode = '''
        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
    '''
    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)

    vpadalCode = '''
        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
    '''
    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)

    vclsCode = '''
        unsigned count = 0;
        if (srcElem1 < 0) {
            srcElem1 <<= 1;
            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
                count++;
                srcElem1 <<= 1;
            }
        } else {
            srcElem1 <<= 1;
            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
                count++;
                srcElem1 <<= 1;
            }
        }
        destElem = count;
    '''
    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)

    vclzCode = '''
        unsigned count = 0;
        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
            count++;
            srcElem1 <<= 1;
        }
        destElem = count;
    '''
    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)

    vcntCode = '''
        unsigned count = 0;
        while (srcElem1 && count < sizeof(Element) * 8) {
            count += srcElem1 & 0x1;
            srcElem1 >>= 1;
        }
        destElem = count;
    '''

    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)

    vmvnCode = '''
        destElem = ~srcElem1;
    '''
    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)

    vqabsCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
            fpscr.qc = 1;
            destElem = ~srcElem1;
        } else if (srcElem1 < 0) {
            destElem = -srcElem1;
        } else {
            destElem = srcElem1;
        }
        FpscrQc = fpscr;
    '''
    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)

    vqnegCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
            fpscr.qc = 1;
            destElem = ~srcElem1;
        } else {
            destElem = -srcElem1;
        }
        FpscrQc = fpscr;
    '''
    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)

    vabsCode = '''
        if (srcElem1 < 0) {
            destElem = -srcElem1;
        } else {
            destElem = srcElem1;
        }
    '''

    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
    vabsfpCode = '''
        union
        {
            uint32_t i;
            float f;
        } cStruct;
        cStruct.f = srcReg1;
        cStruct.i &= mask(sizeof(Element) * 8 - 1);
        destReg = cStruct.f;
    '''
    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)

    vnegCode = '''
        destElem = -srcElem1;
    '''
    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
    vnegfpCode = '''
        destReg = -srcReg1;
    '''
    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)

    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
    vcgtfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgtFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
            2, vcgtfpCode, toInt = True)
    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
            4, vcgtfpCode, toInt = True)

    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
    vcgefpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgeFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
            2, vcgefpCode, toInt = True)
    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
            4, vcgefpCode, toInt = True)

    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
    vceqfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, (float)0.0, vceqFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
            2, vceqfpCode, toInt = True)
    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
            4, vceqfpCode, toInt = True)

    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
    vclefpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcleFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
            2, vclefpCode, toInt = True)
    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
            4, vclefpCode, toInt = True)

    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
    vcltfpCode = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcltFunc,
                             true, true, VfpRoundNearest);
        destReg = (res == 0) ? -1 : 0;
        if (res == 2.0)
            fpscr.ioc = 1;
        FpscrExc = fpscr;
    '''
    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
            2, vcltfpCode, toInt = True)
    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
            4, vcltfpCode, toInt = True)

    vswpCode = '''
        uint32_t mid;
        for (unsigned r = 0; r < rCount; r++) {
            mid = srcReg1.regs[r];
            srcReg1.regs[r] = destReg.regs[r];
            destReg.regs[r] = mid;
        }
    '''
    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)

    vtrnCode = '''
        Element mid;
        for (unsigned i = 0; i < eCount; i += 2) {
            mid = srcReg1.elements[i];
            srcReg1.elements[i] = destReg.elements[i + 1];
            destReg.elements[i + 1] = mid;
        }
    '''
    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
            smallUnsignedTypes, 2, vtrnCode)
    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
            smallUnsignedTypes, 4, vtrnCode)

    vuzpCode = '''
        Element mid[eCount];
        memcpy(&mid, &srcReg1, sizeof(srcReg1));
        for (unsigned i = 0; i < eCount / 2; i++) {
            srcReg1.elements[i] = destReg.elements[2 * i + 1];
            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
            destReg.elements[i] = destReg.elements[2 * i];
        }
        for (unsigned i = 0; i < eCount / 2; i++) {
            destReg.elements[eCount / 2 + i] = mid[2 * i];
        }
    '''
    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)

    vzipCode = '''
        Element mid[eCount];
        memcpy(&mid, &destReg, sizeof(destReg));
        for (unsigned i = 0; i < eCount / 2; i++) {
            destReg.elements[2 * i] = mid[i];
            destReg.elements[2 * i + 1] = srcReg1.elements[i];
        }
        for (int i = 0; i < eCount / 2; i++) {
            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
        }
    '''
    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)

    vmovnCode = 'destElem = srcElem1;'
    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)

    vdupCode = 'destElem = srcElem1;'
    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)

    def vdupGprInst(name, Name, opClass, types, rCount):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect destReg;
        for (unsigned i = 0; i < eCount; i++) {
            destReg.elements[i] = htole((Element)Op1);
        }
        '''
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)
    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)

    vmovCode = 'destElem = imm;'
    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)

    vorrCode = 'destElem |= imm;'
    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)

    vmvnCode = 'destElem = ~imm;'
    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)

    vbicCode = 'destElem &= ~imm;'
    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)

    vqmovnCode = '''
    FPSCR fpscr = (FPSCR) FpscrQc;
    destElem = srcElem1;
    if ((BigElement)destElem != srcElem1) {
        fpscr.qc = 1;
        destElem = mask(sizeof(Element) * 8 - 1);
        if (srcElem1 < 0)
            destElem = ~destElem;
    }
    FpscrQc = fpscr;
    '''
    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)

    vqmovunCode = '''
    FPSCR fpscr = (FPSCR) FpscrQc;
    destElem = srcElem1;
    if ((BigElement)destElem != srcElem1) {
        fpscr.qc = 1;
        destElem = mask(sizeof(Element) * 8);
    }
    FpscrQc = fpscr;
    '''
    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)

    vqmovunsCode = '''
    FPSCR fpscr = (FPSCR) FpscrQc;
    destElem = srcElem1;
    if (srcElem1 < 0 ||
            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
        fpscr.qc = 1;
        destElem = mask(sizeof(Element) * 8);
        if (srcElem1 < 0)
            destElem = ~destElem;
    }
    FpscrQc = fpscr;
    '''
    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
            "SimdMiscOp", smallSignedTypes, vqmovunsCode)

    def buildVext(name, Name, opClass, types, rCount, op):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcReg1, srcReg2, destReg;
        '''
        for reg in range(rCount):
            eWalkCode += '''
                srcReg1.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);
                srcReg2.regs[%(reg)d] = htole(FpOp2P%(reg)d_uw);
            ''' % { "reg" : reg }
        eWalkCode += op
        for reg in range(rCount):
            eWalkCode += '''
            FpDestP%(reg)d_uw = letoh(destReg.regs[%(reg)d]);
            ''' % { "reg" : reg }
        iop = InstObjParams(name, Name,
                            "RegRegRegImmOp",
                            { "code": eWalkCode,
                              "r_count": rCount,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
        exec_output += NeonEqualRegExecute.subst(iop)
        for type in types:
            substDict = { "targs" : type,
                          "class_name" : Name }
            exec_output += NeonExecDeclare.subst(substDict)

    vextCode = '''
        for (unsigned i = 0; i < eCount; i++) {
            unsigned index = i + imm;
            if (index < eCount) {
                destReg.elements[i] = srcReg1.elements[index];
            } else {
                index -= eCount;
                if (index >= eCount) {
                    fault = std::make_shared<UndefinedInstruction>(machInst,
                                                                   false,
                                                                   mnemonic);
                } else {
                    destReg.elements[i] = srcReg2.elements[index];
                }
            }
        }
    '''
    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)

    def buildVtbxl(name, Name, opClass, length, isVtbl):
        global header_output, decoder_output, exec_output
        code = simdEnabledCheckCode + '''
            union
            {
                uint8_t bytes[32];
                uint32_t regs[8];
            } table;

            union
            {
                uint8_t bytes[8];
                uint32_t regs[2];
            } destReg, srcReg2;

            const unsigned length = %(length)d;
            const bool isVtbl = %(isVtbl)s;

            srcReg2.regs[0] = htole(FpOp2P0_uw);
            srcReg2.regs[1] = htole(FpOp2P1_uw);

            destReg.regs[0] = htole(FpDestP0_uw);
            destReg.regs[1] = htole(FpDestP1_uw);
        ''' % { "length" : length, "isVtbl" : isVtbl }
        for reg in range(8):
            if reg < length * 2:
                code += 'table.regs[%(reg)d] = htole(FpOp1P%(reg)d_uw);\n' % \
                        { "reg" : reg }
            else:
                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
        code += '''
        for (unsigned i = 0; i < sizeof(destReg); i++) {
            uint8_t index = srcReg2.bytes[i];
            if (index < 8 * length) {
                destReg.bytes[i] = table.bytes[index];
            } else {
                if (isVtbl)
                    destReg.bytes[i] = 0;
                // else destReg.bytes[i] unchanged
            }
        }

        FpDestP0_uw = letoh(destReg.regs[0]);
        FpDestP1_uw = letoh(destReg.regs[1]);
        '''
        iop = InstObjParams(name, Name,
                            "RegRegRegOp",
                            { "code": code,
                              "predicate_test": predicateTest,
                              "op_class": opClass }, [])
        header_output += RegRegRegOpDeclare.subst(iop)
        decoder_output += RegRegRegOpConstructor.subst(iop)
        exec_output += PredOpExecute.subst(iop)

    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")

    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
}};
