src/arch/arm/isa/insts/fp64.isa - testing/jenkins-gem5-prod - Git at Google

 // -*- mode:c++ -*-

 // Copyright (c) 2012-2013, 2016-2018 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
 // not be construed as granting a license to any other intellectual
 // property including but not limited to intellectual property relating
 // to a hardware implementation of the functionality of the software
 // licensed hereunder.  You may use the software subject to the license
 // terms below provided that you ensure that this notice is replicated
 // unmodified and in its entirety in all distributions of the software,
 // modified or unmodified, in source code or in binary form.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met: redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer;
 // redistributions in binary form must reproduce the above copyright
 // notice, this list of conditions and the following disclaimer in the
 // documentation and/or other materials provided with the distribution;
 // neither the name of the copyright holders nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Authors: Thomas Grocutt
 //          Edmund Grimley Evans

 let {{

     header_output = ""
     decoder_output = ""
     exec_output = ""

     fmovImmSCode = vfp64EnabledCheckCode + '''
         AA64FpDestP0_uw = bits(imm, 31, 0);
         AA64FpDestP1_uw = 0;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
     '''
     fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
                                 { "code": fmovImmSCode,
                                   "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegImmOpDeclare.subst(fmovImmSIop);
     decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
     exec_output    += BasicExecute.subst(fmovImmSIop);

     fmovImmDCode = vfp64EnabledCheckCode + '''
         AA64FpDestP0_uw = bits(imm, 31, 0);
         AA64FpDestP1_uw = bits(imm, 63, 32);
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
     '''
     fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
                                 { "code": fmovImmDCode,
                                   "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegImmOpDeclare.subst(fmovImmDIop);
     decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
     exec_output    += BasicExecute.subst(fmovImmDIop);

     fmovRegSCode = vfp64EnabledCheckCode + '''
         AA64FpDestP0_uw = AA64FpOp1P0_uw;
         AA64FpDestP1_uw = 0;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
     '''
     fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
                                 { "code": fmovRegSCode,
                                   "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fmovRegSIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
     exec_output    += BasicExecute.subst(fmovRegSIop);

     fmovRegDCode = vfp64EnabledCheckCode + '''
         AA64FpDestP0_uw = AA64FpOp1P0_uw;
         AA64FpDestP1_uw = AA64FpOp1P1_uw;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
     '''
     fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
                                 { "code": fmovRegDCode,
                                   "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fmovRegDIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
     exec_output    += BasicExecute.subst(fmovRegDIop);

     fmovCoreRegWCode = vfp64EnabledCheckCode + '''
         AA64FpDestP0_uw = WOp1_uw;
         AA64FpDestP1_uw = 0;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
     '''
     fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
                                     { "code": fmovCoreRegWCode,
                                       "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
     exec_output    += BasicExecute.subst(fmovCoreRegWIop);

     fmovCoreRegXCode = vfp64EnabledCheckCode + '''
         AA64FpDestP0_uw = XOp1_ud;
         AA64FpDestP1_uw = XOp1_ud >> 32;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
     '''
     fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
                                     { "code": fmovCoreRegXCode,
                                       "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
     exec_output    += BasicExecute.subst(fmovCoreRegXIop);

     fmovUCoreRegXCode = vfp64EnabledCheckCode + '''
         /* Explicitly merge with previous value */
         AA64FpDestP0_uw = AA64FpDestP0_uw;
         AA64FpDestP1_uw = AA64FpDestP1_uw;
         AA64FpDestP2_uw = XOp1_ud;
         AA64FpDestP3_uw = XOp1_ud >> 32;'''
     fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
                                     { "code": fmovUCoreRegXCode,
                                       "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
     exec_output    += BasicExecute.subst(fmovUCoreRegXIop);

     fmovRegCoreWCode = vfp64EnabledCheckCode + '''
         WDest = AA64FpOp1P0_uw;
     '''
     fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
                                      { "code": fmovRegCoreWCode,
                                        "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
     exec_output    += BasicExecute.subst(fmovRegCoreWIop);

     fmovRegCoreXCode = vfp64EnabledCheckCode + '''
         XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw;
     '''
     fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
                                      { "code": fmovRegCoreXCode,
                                        "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
     exec_output    += BasicExecute.subst(fmovRegCoreXIop);

     fmovURegCoreXCode = vfp64EnabledCheckCode + '''
         XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw;
     '''
     fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
                                     { "code":     fmovURegCoreXCode,
                                       "op_class": "FloatMiscOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
     exec_output    += BasicExecute.subst(fmovURegCoreXIop);
 }};

 let {{

     header_output = ""
     decoder_output = ""
     exec_output = ""

     halfIntConvCode = vfp64EnabledCheckCode + '''
         FPSCR fpscr = (FPSCR) FpscrExc;
         uint16_t cOp1  = AA64FpOp1P0_uw;
         uint16_t cDest = %(op)s;
         AA64FpDestP0_uw = cDest;
         AA64FpDestP1_uw = 0;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
         FpscrExc = fpscr;
     '''

     halfIntConvCode2 = vfp64EnabledCheckCode + '''
         FPSCR fpscr = (FPSCR) FpscrExc;
         uint16_t cOp1  = AA64FpOp1P0_uw;
         uint16_t cOp2  = AA64FpOp2P0_uw;
         uint16_t cDest = %(op)s;
         AA64FpDestP0_uw = cDest;
         AA64FpDestP1_uw = 0;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
         FpscrExc = fpscr;
     '''

     halfBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
                 "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
     halfUnaryOp = "unaryOp(fpscr, AA64FpOp1P0," + \
                   "%(func)s, fpscr.fz, fpscr.rMode)"

     singleIntConvCode = vfp64EnabledCheckCode + '''
         FPSCR fpscr = (FPSCR) FpscrExc;
         uint32_t cOp1  = AA64FpOp1P0_uw;
         uint32_t cDest = %(op)s;
         AA64FpDestP0_uw = cDest;
         AA64FpDestP1_uw = 0;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
         FpscrExc = fpscr;
     '''

     singleIntConvCode2 = vfp64EnabledCheckCode + '''
         FPSCR fpscr = (FPSCR) FpscrExc;
         uint32_t cOp1  = AA64FpOp1P0_uw;
         uint32_t cOp2  = AA64FpOp2P0_uw;
         uint32_t cDest = %(op)s;
         AA64FpDestP0_uw = cDest;
         AA64FpDestP1_uw = 0;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
         FpscrExc = fpscr;
     '''

     singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
                 "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
     singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)"

     doubleIntConvCode = vfp64EnabledCheckCode + '''
         FPSCR fpscr = (FPSCR) FpscrExc;
         uint64_t cOp1  = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
         uint64_t cDest = %(op)s;
         AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
         AA64FpDestP1_uw = cDest >> 32;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
         FpscrExc = fpscr;
     '''

     doubleIntConvCode2 = vfp64EnabledCheckCode + '''
         FPSCR fpscr = (FPSCR) FpscrExc;
         uint64_t cOp1  = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
         uint64_t cOp2  = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw;
         uint64_t cDest = %(op)s;
         AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
         AA64FpDestP1_uw = cDest >> 32;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
         FpscrExc = fpscr;
     '''

     doubleBinOp = '''
         binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw),
                         dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw),
                         %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
     '''
     doubleUnaryOp = '''
         unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s,
                 fpscr.fz, fpscr.rMode)
     '''

     def buildTernaryFpOp(name, opClass, hOp, sOp, dOp):
         global header_output, decoder_output, exec_output
         for suffix in "D", "S", "H":
             code = vfp64EnabledCheckCode + '''
                 FPSCR fpscr = (FPSCR) FpscrExc;
             '''
             if suffix == "H":
                 code += '''
                     uint16_t cOp1 = AA64FpOp1P0_uw;
                     uint16_t cOp2 = AA64FpOp2P0_uw;
                     uint16_t cOp3 = AA64FpOp3P0_uw;
                     uint16_t cDest;
                 ''' "cDest = " + hOp + ";" + '''
                     AA64FpDestP0_uw = cDest;
                     AA64FpDestP1_uw = 0;
                 '''
             elif suffix == "S":
                 code += '''
                     uint32_t cOp1 = AA64FpOp1P0_uw;
                     uint32_t cOp2 = AA64FpOp2P0_uw;
                     uint32_t cOp3 = AA64FpOp3P0_uw;
                     uint32_t cDest;
                 ''' "cDest = " + sOp + ";" + '''
                     AA64FpDestP0_uw = cDest;
                     AA64FpDestP1_uw = 0;
                 '''
             elif suffix == "D":
                 code += '''
                     uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
                     uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32;
                     uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32;
                     uint64_t cDest;
                 ''' "cDest = " + dOp + ";" + '''
                     AA64FpDestP0_uw = cDest;
                     AA64FpDestP1_uw = cDest >> 32;
                 '''
             code += '''
                 AA64FpDestP2_uw = 0;
                 AA64FpDestP3_uw = 0;
                 FpscrExc = fpscr;
             '''

             iop = InstObjParams(name.lower(), name + suffix,
                                 "FpRegRegRegRegOp",
                                 { "code": code, "op_class": opClass }, [])

             header_output  += AA64FpRegRegRegRegOpDeclare.subst(iop)
             decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
             exec_output    += BasicExecute.subst(iop)

     buildTernaryFpOp("FMAdd", "FloatMultAccOp",
                      "fplibMulAdd<uint16_t>(cOp3, cOp1, cOp2, fpscr)",
                      "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
                      "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
     buildTernaryFpOp("FMSub", "FloatMultAccOp",
         "fplibMulAdd<uint16_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
         "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
         "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
     buildTernaryFpOp("FNMAdd", "FloatMultAccOp",
                      "fplibMulAdd<uint16_t>(fplibNeg<uint16_t>(cOp3), " +
                      "fplibNeg<uint16_t>(cOp1), cOp2, fpscr)",
                      "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), " +
                      "fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
                      "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), " +
                      "fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
     buildTernaryFpOp("FNMSub", "FloatMultAccOp",
         "fplibMulAdd<uint16_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
         "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
         "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" )

     def buildBinFpOp(name, Name, base, opClass, halfOp, singleOp, doubleOp):
         global header_output, decoder_output, exec_output

         code = halfIntConvCode2 % { "op": halfOp }
         hIop = InstObjParams(name, Name + "H", base,
                 { "code": code,
                   "op_class": opClass }, [])

         code = singleIntConvCode2 % { "op": singleOp }
         sIop = InstObjParams(name, Name + "S", base,
                 { "code": code,
                   "op_class": opClass }, [])

         code = doubleIntConvCode2 % { "op": doubleOp }
         dIop = InstObjParams(name, Name + "D", base,
                 { "code": code,
                   "op_class": opClass }, [])

         declareTempl     = eval(         base + "Declare");
         constructorTempl = eval("AA64" + base + "Constructor");

         for iop in hIop, sIop, dIop:
             header_output  += declareTempl.subst(iop)
             decoder_output += constructorTempl.subst(iop)
             exec_output    += BasicExecute.subst(iop)

     buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "FloatAddOp",
                  "fplibAdd<uint16_t>(cOp1, cOp2, fpscr)",
                  "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
                  "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
     buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "FloatAddOp",
                  "fplibSub<uint16_t>(cOp1, cOp2, fpscr)",
                  "fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
                  "fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
     buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "FloatDivOp",
                  "fplibDiv<uint16_t>(cOp1, cOp2, fpscr)",
                  "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
                  "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
     buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "FloatMultOp",
                  "fplibMul<uint16_t>(cOp1, cOp2, fpscr)",
                  "fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
                  "fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
     buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "FloatMultOp",
                  "fplibNeg<uint16_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
                  "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
                  "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
     buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "FloatCmpOp",
                  "fplibMin<uint16_t>(cOp1, cOp2, fpscr)",
                  "fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
                  "fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
     buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "FloatCmpOp",
                  "fplibMax<uint16_t>(cOp1, cOp2, fpscr)",
                  "fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
                  "fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
     buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "FloatCmpOp",
                  "fplibMinNum<uint16_t>(cOp1, cOp2, fpscr)",
                  "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
                  "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
     buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "FloatCmpOp",
                  "fplibMaxNum<uint16_t>(cOp1, cOp2, fpscr)",
                  "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
                  "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")

     def buildUnaryFpOp(name, Name, base, opClass,
                        halfOp, singleOp, doubleOp = None):
         if doubleOp is None:
             doubleOp = singleOp
         global header_output, decoder_output, exec_output

         code = halfIntConvCode % { "op": halfOp }
         hIop = InstObjParams(name, Name + "H", base,
                 { "code": code,
                   "op_class": opClass }, [])
         code = singleIntConvCode % { "op": singleOp }
         sIop = InstObjParams(name, Name + "S", base,
                 { "code": code,
                   "op_class": opClass }, [])
         code = doubleIntConvCode % { "op": doubleOp }
         dIop = InstObjParams(name, Name + "D", base,
                 { "code": code,
                   "op_class": opClass }, [])

         declareTempl     = eval(         base + "Declare");
         constructorTempl = eval("AA64" + base + "Constructor");

         for iop in hIop, sIop, dIop:
             header_output  += declareTempl.subst(iop)
             decoder_output += constructorTempl.subst(iop)
             exec_output    += BasicExecute.subst(iop)

     buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "FloatSqrtOp",
                    "fplibSqrt<uint16_t>(cOp1, fpscr)",
                    "fplibSqrt<uint32_t>(cOp1, fpscr)",
                    "fplibSqrt<uint64_t>(cOp1, fpscr)")

     def buildSimpleUnaryFpOp(name, Name, base, opClass, halfOp, singleOp,
                              doubleOp = None, isIntConv = True):
         if doubleOp is None:
             doubleOp = singleOp
         global header_output, decoder_output, exec_output

         if isIntConv:
             hCode = halfIntConvCode
             sCode = singleIntConvCode
             dCode = doubleIntConvCode
         else:
             hCode = halfCode
             sCode = singleCode
             dCode = doubleCode

         for code, op, suffix in [[hCode, halfOp, "H"],
                                  [sCode, singleOp, "S"],
                                  [dCode, doubleOp, "D"]]:
             iop = InstObjParams(name, Name + suffix, base,
                 { "code": code % { "op": op },
                   "op_class": opClass }, [])

             declareTempl     = eval(         base + "Declare");
             constructorTempl = eval("AA64" + base + "Constructor");

             header_output  += declareTempl.subst(iop)
             decoder_output += constructorTempl.subst(iop)
             exec_output    += BasicExecute.subst(iop)

     buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "FloatMiscOp",
                          "fplibNeg<uint16_t>(cOp1)",
                          "fplibNeg<uint32_t>(cOp1)",
                          "fplibNeg<uint64_t>(cOp1)")
     buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "FloatMiscOp",
                          "fplibAbs<uint16_t>(cOp1)",
                          "fplibAbs<uint32_t>(cOp1)",
                          "fplibAbs<uint64_t>(cOp1)")
     buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "FloatMiscOp",
         "fplibRoundInt<uint16_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
         "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
         "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)")
     buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "FloatMiscOp",
         "fplibRoundInt<uint16_t>(cOp1, FPRounding_POSINF, false, fpscr)",
         "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)",
         "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)")
     buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "FloatMiscOp",
         "fplibRoundInt<uint16_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
         "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
         "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)")
     buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "FloatMiscOp",
         "fplibRoundInt<uint16_t>(cOp1, FPRounding_ZERO, false, fpscr)",
         "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)",
         "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)")
     buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "FloatMiscOp",
         "fplibRoundInt<uint16_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
         "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
         "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)")
     buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "FloatMiscOp",
         "fplibRoundInt<uint16_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
         "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
         "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)")
     buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "FloatMiscOp",
         "fplibRoundInt<uint16_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
         "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
         "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)")
 }};

 let {{

     header_output = ""
     decoder_output = ""
     exec_output = ""

     # Creates the integer to floating point instructions, including variants for
     # signed/unsigned, float/double, etc
     for regL, regOpL, width in [["W", "w", 32],
                                 ["X", "d", 64]]:
         for isDouble in True, False:
             for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)],
                                ["S", "int%d_t  cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]:
                 fcvtIntFpDCode = vfp64EnabledCheckCode + '''
                     FPSCR fpscr = (FPSCR) FpscrExc;
                     %s
                 ''' %(usCode)

                 if isDouble:
                     fcvtIntFpDCode += '''
                         uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0,
                             %s, FPCRRounding(fpscr), fpscr);
                         AA64FpDestP0_uw = cDest;
                         AA64FpDestP1_uw = cDest >> 32;
                     ''' % ("true" if us == "U" else "false")
                 else:
                     fcvtIntFpDCode += '''
                         uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0,
                             %s, FPCRRounding(fpscr), fpscr);
                         AA64FpDestP0_uw = cDest;
                         AA64FpDestP1_uw = 0;
                     ''' % ("true" if us == "U" else "false")
                 fcvtIntFpDCode += '''
                     AA64FpDestP2_uw = 0;
                     AA64FpDestP3_uw = 0;
                     FpscrExc = fpscr;
                 '''

                 instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S")
                 mnem     = "%scvtf" %(us.lower())
                 fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
                                               { "code": fcvtIntFpDCode,
                                                 "op_class": "FloatCvtOp" }, [])
                 header_output  += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
                 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
                 exec_output    += BasicExecute.subst(fcvtIntFpDIop);

     # Generates the floating point to integer conversion instructions in various
     # variants, eg signed/unsigned
     def buildFpCvtIntOp(isDouble, isSigned, isXReg):
         global header_output, decoder_output, exec_output

         for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"],
                                     ["P", "FPRounding_POSINF"],
                                     ["M", "FPRounding_NEGINF"],
                                     ["Z", "FPRounding_ZERO"],
                                     ["A", "FPRounding_TIEAWAY"]]:
             fcvtFpIntCode = vfp64EnabledCheckCode + '''
                 FPSCR fpscr = (FPSCR) FpscrExc;'''
             if isDouble:
                 fcvtFpIntCode += '''
                 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
                 '''
             else:
                 fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"

             fcvtFpIntCode += '''
                 %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr);
                 FpscrExc = fpscr;
             ''' %("X"      if isXReg   else "W",
                   "64"     if isDouble else "32",
                   "64"     if isXReg   else "32",
                   "false"  if isSigned else "true",
                   roundingMode)

             instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U",
                                              "X" if isXReg   else "W",
                                              "D" if isDouble else "S", rmode)
             mnem     = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
             fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
                                         { "code": fcvtFpIntCode,
                                         "op_class": "FloatCvtOp" }, [])
             header_output  += FpRegRegOpDeclare.subst(fcvtFpIntIop);
             decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
             exec_output    += BasicExecute.subst(fcvtFpIntIop);

     # Now actually do the building with the different variants
     for isDouble in True, False:
        for isSigned in True, False:
            for isXReg in True, False:
              buildFpCvtIntOp(isDouble, isSigned, isXReg)

     fcvtFpSFpDCode = vfp64EnabledCheckCode + '''
         FPSCR fpscr = (FPSCR) FpscrExc;
         uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw,
             FPCRRounding(fpscr), fpscr);
         AA64FpDestP0_uw = cDest;
         AA64FpDestP1_uw = cDest >> 32;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
         FpscrExc = fpscr;
     '''
     fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
                                      { "code": fcvtFpSFpDCode,
                                        "op_class": "FloatCvtOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
     exec_output    += BasicExecute.subst(fcvtFpSFpDIop);

     fcvtFpDFpSCode = vfp64EnabledCheckCode + '''
         FPSCR fpscr = (FPSCR) FpscrExc;
         uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
         AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1,
             FPCRRounding(fpscr), fpscr);
         AA64FpDestP1_uw = 0;
         AA64FpDestP2_uw = 0;
         AA64FpDestP3_uw = 0;
         FpscrExc = fpscr;
     '''
     fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
                                  {"code":     fcvtFpDFpSCode,
                                   "op_class": "FloatCvtOp" }, [])
     header_output  += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
     decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
     exec_output    += BasicExecute.subst(fcvtFpDFpSIop);

     # Half precision to single or double precision conversion
     for isDouble in True, False:
         code = vfp64EnabledCheckCode + '''
             FPSCR fpscr = (FPSCR) FpscrExc;
             %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw,
                 FPCRRounding(fpscr), fpscr);
         ''' % ("uint64_t" if isDouble else "uint32_t",
                "64" if isDouble else "32")
         if isDouble:
             code += '''
                 AA64FpDestP0_uw = cDest;
                 AA64FpDestP1_uw = cDest >> 32;
             '''
         else:
             code += '''
                 AA64FpDestP0_uw = cDest;
                 AA64FpDestP1_uw = 0;
             '''
         code += '''
             AA64FpDestP2_uw = 0;
             AA64FpDestP3_uw = 0;
             FpscrExc = fpscr;
         '''

         instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
         fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
                                      { "code": code,
                                        "op_class": "FloatCvtOp" }, [])
         header_output  += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
         decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
         exec_output    += BasicExecute.subst(fcvtFpHFpIop);

     # single or double precision to Half precision conversion
     for isDouble in True, False:
         code = vfp64EnabledCheckCode + '''
             FPSCR fpscr = (FPSCR) FpscrExc;
             %s;
             AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1,
                 FPCRRounding(fpscr), fpscr);
             AA64FpDestP1_uw = 0;
             AA64FpDestP2_uw = 0;
             AA64FpDestP3_uw = 0;
             FpscrExc = fpscr;
         ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
                if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw",
                "64" if isDouble else "32")

         instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
         fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
                                      { "code": code,
                                        "op_class": "FloatCvtOp" }, [])
         header_output  += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
         decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
         exec_output    += BasicExecute.subst(fcvtFpFpHIop);

     # Build the various versions of the floating point compare instructions
     def buildFCmpOp(isQuiet, isDouble, isImm):
         global header_output, decoder_output, exec_output

         fcmpCode = vfp64EnabledCheckCode + '''
             FPSCR fpscr = (FPSCR) FpscrExc;
             %s cOp1 = %s;
         ''' % ("uint64_t" if isDouble else "uint32_t",
                "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32"
                if isDouble else "AA64FpDestP0_uw")
         if isImm:
             fcmpCode += '''
                 %s cOp2 = imm;
             ''' % ("uint64_t" if isDouble else "uint32_t")
         else:
             fcmpCode += '''
                 %s cOp2  = %s;
             ''' % ("uint64_t" if isDouble else "uint32_t",
                    "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
                    if isDouble else "AA64FpOp1P0_uw")
         fcmpCode += '''
             int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
             CondCodesNZ = cc >> 2 & 3;
             CondCodesC = cc >> 1 & 1;
             CondCodesV = cc & 1;
             FpCondCodes = fpscr & FpCondCodesMask;
             FpscrExc    = fpscr;
         ''' % ("64" if isDouble else "32", "false" if isQuiet else "true")

         typeName = "Imm" if isImm else "Reg"
         instName = "FCmp%s%s%s" %(""  if isQuiet  else "E", typeName,
                                   "D" if isDouble else "S")
         fcmpIop = InstObjParams("fcmp%s" %(""  if isQuiet else "e"), instName,
                                 "FpReg%sOp" %(typeName),
                                {"code":     fcmpCode,
                                 "op_class": "FloatCmpOp"}, [])

         declareTemp     = eval("FpReg%sOpDeclare"         %(typeName));
         constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName));
         header_output  += declareTemp.subst(fcmpIop);
         decoder_output += constructorTemp.subst(fcmpIop);
         exec_output    += BasicExecute.subst(fcmpIop);

     for isQuiet in True, False:
         for isDouble in True, False:
             for isImm in True, False:
                 buildFCmpOp(isQuiet, isDouble, isImm)

     # Build the various versions of the conditional floating point compare
     # instructions
     def buildFCCmpOp(isQuiet, isDouble):
         global header_output, decoder_output, exec_output

         fccmpCode = vfp64EnabledCheckCode + '''
             FPSCR fpscr = (FPSCR) FpscrExc;
             if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
                 %s cOp1 = %s;
                 %s cOp2 = %s;
                 int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
                 CondCodesNZ = cc >> 2 & 3;
                 CondCodesC = cc >> 1 & 1;
                 CondCodesV = cc & 1;
             } else {
                 CondCodesNZ = (defCc >> 2) & 0x3;
                 CondCodesC  = (defCc >> 1) & 0x1;
                 CondCodesV  = defCc & 0x1;
             }
             FpCondCodes = fpscr & FpCondCodesMask;
             FpscrExc    = fpscr;
         ''' % ("uint64_t" if isDouble else "uint32_t",
                "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
                if isDouble else "AA64FpOp1P0_uw",
                "uint64_t" if isDouble else "uint32_t",
                "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32"
                if isDouble else "AA64FpOp2P0_uw",
                "64" if isDouble else "32", "false" if isQuiet else "true")

         instName = "FCCmp%sReg%s" %(""  if isQuiet  else "E",
                                     "D" if isDouble else "S")
         fccmpIop = InstObjParams("fccmp%s" %(""  if isQuiet  else "e"),
                                  instName, "FpCondCompRegOp",
                                 {"code":           fccmpCode,
                                  "op_class":       "FloatCmpOp"}, [])
         header_output  += DataXCondCompRegDeclare.subst(fccmpIop);
         decoder_output += DataXCondCompRegConstructor.subst(fccmpIop);
         exec_output    += BasicExecute.subst(fccmpIop);

     for isQuiet in True, False:
         for isDouble in True, False:
             buildFCCmpOp(isQuiet, isDouble)

 }};

 let {{

     header_output = ""
     decoder_output = ""
     exec_output = ""

     # Generates the variants of the floating to fixed point instructions
     def buildFpCvtFixedOp(isSigned, isDouble, isXReg):
         global header_output, decoder_output, exec_output

         fcvtFpFixedCode = vfp64EnabledCheckCode + '''
             FPSCR fpscr = (FPSCR) FpscrExc;
         '''
         if isDouble:
             fcvtFpFixedCode += '''
                 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
             '''
         else:
             fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
         fcvtFpFixedCode += '''
             %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s,
                 FPRounding_ZERO, fpscr);
             FpscrExc = fpscr;
         ''' %("X"      if isXReg   else "W",
               "64"     if isDouble else "32",
               "64"     if isXReg   else "32",
               "false"  if isSigned else "true")

         instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U",
                                          "D" if isDouble else "S",
                                          "X" if isXReg   else "W")
         mnem = "fcvtz%s" %("s" if isSigned else "u")
         fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
                                        { "code": fcvtFpFixedCode,
                                          "op_class": "FloatCvtOp" }, [])
         header_output  += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop);
         decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop);
         exec_output    += BasicExecute.subst(fcvtFpFixedIop);

     # Generates the variants of the fixed to floating point instructions
     def buildFixedCvtFpOp(isSigned, isDouble, isXReg):
         global header_output, decoder_output, exec_output

         srcRegType = "X" if isXReg   else "W"
         fcvtFixedFpCode = vfp64EnabledCheckCode + '''
             FPSCR fpscr = (FPSCR) FpscrExc;
             %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm,
                 %s, FPCRRounding(fpscr), fpscr);
         ''' %("uint64_t" if isDouble else "uint32_t",
               "64" if isDouble else "32",
               "int" if isSigned else "uint", "64" if isXReg else "32",
               srcRegType,
               "false" if isSigned else "true")
         if isDouble:
             fcvtFixedFpCode += '''
                 AA64FpDestP0_uw = result;
                 AA64FpDestP1_uw = result >> 32;
             '''
         else:
             fcvtFixedFpCode += '''
                 AA64FpDestP0_uw = result;
                 AA64FpDestP1_uw = 0;
             '''
         fcvtFixedFpCode += '''
             AA64FpDestP2_uw = 0;
             AA64FpDestP3_uw = 0;
             FpscrExc = fpscr;
         '''

         instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U",
                                          "D" if isDouble else "S",
                                          srcRegType)
         mnem = "%scvtf" %("s" if isSigned else "u")
         fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
                                        { "code":     fcvtFixedFpCode,
                                          "op_class": "FloatCvtOp" }, [])
         header_output  += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop);
         decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop);
         exec_output    += BasicExecute.subst(fcvtFixedFpIop);

     # loop over the variants building the instructions for each
     for isXReg in True, False:
         for isDouble in True, False:
             for isSigned in True, False:
                 buildFpCvtFixedOp(isSigned, isDouble, isXReg)
                 buildFixedCvtFpOp(isSigned, isDouble, isXReg)
 }};

 let {{

     header_output  = ""
     decoder_output = ""
     exec_output    = ""

     for isDouble in True, False:
         code = '''
             if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
                 AA64FpDestP0_uw = AA64FpOp1P0_uw;
         '''
         if isDouble:
             code += '''
                     AA64FpDestP1_uw = AA64FpOp1P1_uw;
                 } else {
                     AA64FpDestP0_uw = AA64FpOp2P0_uw;
                     AA64FpDestP1_uw = AA64FpOp2P1_uw;
                 }
             '''
         else:
             code += '''
                 } else {
                     AA64FpDestP0_uw = AA64FpOp2P0_uw;
                 }
                 AA64FpDestP1_uw = 0;
             '''
         code += '''
             AA64FpDestP2_uw = 0;
             AA64FpDestP3_uw = 0;
         '''

         iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"),
                             "FpCondSelOp", { "code":     code,
                                              "op_class": "FloatCvtOp" })
         header_output  += DataXCondSelDeclare.subst(iop)
         decoder_output += DataXCondSelConstructor.subst(iop)
         exec_output    += BasicExecute.subst(iop)
 }};