blob: 7decbac2577589a25b9dc39cda531ec650a2b885 [file] [log] [blame]
// -*- mode:c++ -*-
// Copyright (c) 2012-2013, 2016-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
// not be construed as granting a license to any other intellectual
// property including but not limited to intellectual property relating
// to a hardware implementation of the functionality of the software
// licensed hereunder. You may use the software subject to the license
// terms below provided that you ensure that this notice is replicated
// unmodified and in its entirety in all distributions of the software,
// modified or unmodified, in source code or in binary form.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Thomas Grocutt
// Edmund Grimley Evans
let {{
header_output = ""
decoder_output = ""
exec_output = ""
zeroSveVecRegUpperPartCode = '''
ArmISA::ISA::zeroSveVecRegUpperPart(%s,
ArmStaticInst::getCurSveVecLen<uint64_t>(xc->tcBase()));
'''
fmovImmSCode = vfp64EnabledCheckCode + '''
AA64FpDestP0_uw = bits(imm, 31, 0);
AA64FpDestP1_uw = 0;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
'''
fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
{ "code": fmovImmSCode,
"op_class": "FloatMiscOp" }, [])
fmovImmSIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
header_output += FpRegImmOpDeclare.subst(fmovImmSIop);
decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
exec_output += BasicExecute.subst(fmovImmSIop);
fmovImmDCode = vfp64EnabledCheckCode + '''
AA64FpDestP0_uw = bits(imm, 31, 0);
AA64FpDestP1_uw = bits(imm, 63, 32);
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
'''
fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
{ "code": fmovImmDCode,
"op_class": "FloatMiscOp" }, [])
fmovImmDIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
header_output += FpRegImmOpDeclare.subst(fmovImmDIop);
decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
exec_output += BasicExecute.subst(fmovImmDIop);
fmovRegSCode = vfp64EnabledCheckCode + '''
AA64FpDestP0_uw = AA64FpOp1P0_uw;
AA64FpDestP1_uw = 0;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
'''
fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
{ "code": fmovRegSCode,
"op_class": "FloatMiscOp" }, [])
fmovRegSIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fmovRegSIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
exec_output += BasicExecute.subst(fmovRegSIop);
fmovRegDCode = vfp64EnabledCheckCode + '''
AA64FpDestP0_uw = AA64FpOp1P0_uw;
AA64FpDestP1_uw = AA64FpOp1P1_uw;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
'''
fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
{ "code": fmovRegDCode,
"op_class": "FloatMiscOp" }, [])
fmovRegDIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fmovRegDIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
exec_output += BasicExecute.subst(fmovRegDIop);
fmovCoreRegWCode = vfp64EnabledCheckCode + '''
AA64FpDestP0_uw = WOp1_uw;
AA64FpDestP1_uw = 0;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
'''
fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
{ "code": fmovCoreRegWCode,
"op_class": "FloatMiscOp" }, [])
fmovCoreRegWIop.snippets["code"] += zeroSveVecRegUpperPartCode % \
"AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
exec_output += BasicExecute.subst(fmovCoreRegWIop);
fmovCoreRegXCode = vfp64EnabledCheckCode + '''
AA64FpDestP0_uw = XOp1_ud;
AA64FpDestP1_uw = XOp1_ud >> 32;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
'''
fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
{ "code": fmovCoreRegXCode,
"op_class": "FloatMiscOp" }, [])
fmovCoreRegXIop.snippets["code"] += zeroSveVecRegUpperPartCode % \
"AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
exec_output += BasicExecute.subst(fmovCoreRegXIop);
fmovUCoreRegXCode = vfp64EnabledCheckCode + '''
/* Explicitly merge with previous value */
AA64FpDestP0_uw = AA64FpDestP0_uw;
AA64FpDestP1_uw = AA64FpDestP1_uw;
AA64FpDestP2_uw = XOp1_ud;
AA64FpDestP3_uw = XOp1_ud >> 32;'''
fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
{ "code": fmovUCoreRegXCode,
"op_class": "FloatMiscOp" }, [])
fmovUCoreRegXIop.snippets["code"] += zeroSveVecRegUpperPartCode % \
"AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
exec_output += BasicExecute.subst(fmovUCoreRegXIop);
fmovRegCoreWCode = vfp64EnabledCheckCode + '''
WDest = AA64FpOp1P0_uw;
'''
fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
{ "code": fmovRegCoreWCode,
"op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
exec_output += BasicExecute.subst(fmovRegCoreWIop);
fmovRegCoreXCode = vfp64EnabledCheckCode + '''
XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw;
'''
fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
{ "code": fmovRegCoreXCode,
"op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
exec_output += BasicExecute.subst(fmovRegCoreXIop);
fmovURegCoreXCode = vfp64EnabledCheckCode + '''
XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw;
'''
fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
{ "code": fmovURegCoreXCode,
"op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
exec_output += BasicExecute.subst(fmovURegCoreXIop);
}};
let {{
header_output = ""
decoder_output = ""
exec_output = ""
halfIntConvCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
uint16_t cOp1 = AA64FpOp1P0_uw;
uint16_t cDest = %(op)s;
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = 0;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
halfIntConvCode2 = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
uint16_t cOp1 = AA64FpOp1P0_uw;
uint16_t cOp2 = AA64FpOp2P0_uw;
uint16_t cDest = %(op)s;
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = 0;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
halfBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
"%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
halfUnaryOp = "unaryOp(fpscr, AA64FpOp1P0," + \
"%(func)s, fpscr.fz, fpscr.rMode)"
singleIntConvCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
uint32_t cOp1 = AA64FpOp1P0_uw;
uint32_t cDest = %(op)s;
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = 0;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
singleIntConvCode2 = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
uint32_t cOp1 = AA64FpOp1P0_uw;
uint32_t cOp2 = AA64FpOp2P0_uw;
uint32_t cDest = %(op)s;
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = 0;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
"%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)"
doubleIntConvCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
uint64_t cDest = %(op)s;
AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
AA64FpDestP1_uw = cDest >> 32;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
doubleIntConvCode2 = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
uint64_t cOp2 = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw;
uint64_t cDest = %(op)s;
AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
AA64FpDestP1_uw = cDest >> 32;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
doubleBinOp = '''
binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw),
dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw),
%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
'''
doubleUnaryOp = '''
unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s,
fpscr.fz, fpscr.rMode)
'''
def buildTernaryFpOp(name, opClass, hOp, sOp, dOp):
global header_output, decoder_output, exec_output
for suffix in "D", "S", "H":
code = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
'''
if suffix == "H":
code += '''
uint16_t cOp1 = AA64FpOp1P0_uw;
uint16_t cOp2 = AA64FpOp2P0_uw;
uint16_t cOp3 = AA64FpOp3P0_uw;
uint16_t cDest;
''' "cDest = " + hOp + ";" + '''
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = 0;
'''
elif suffix == "S":
code += '''
uint32_t cOp1 = AA64FpOp1P0_uw;
uint32_t cOp2 = AA64FpOp2P0_uw;
uint32_t cOp3 = AA64FpOp3P0_uw;
uint32_t cDest;
''' "cDest = " + sOp + ";" + '''
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = 0;
'''
elif suffix == "D":
code += '''
uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32;
uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32;
uint64_t cDest;
''' "cDest = " + dOp + ";" + '''
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = cDest >> 32;
'''
code += '''
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
iop = InstObjParams(name.lower(), name + suffix,
"FpRegRegRegRegOp",
{ "code": code, "op_class": opClass }, [])
iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
header_output += AA64FpRegRegRegRegOpDeclare.subst(iop)
decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
exec_output += BasicExecute.subst(iop)
buildTernaryFpOp("FMAdd", "FloatMultAccOp",
"fplibMulAdd<uint16_t>(cOp3, cOp1, cOp2, fpscr)",
"fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
"fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
buildTernaryFpOp("FMSub", "FloatMultAccOp",
"fplibMulAdd<uint16_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
"fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
"fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
buildTernaryFpOp("FNMAdd", "FloatMultAccOp",
"fplibMulAdd<uint16_t>(fplibNeg<uint16_t>(cOp3), " +
"fplibNeg<uint16_t>(cOp1), cOp2, fpscr)",
"fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), " +
"fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
"fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), " +
"fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
buildTernaryFpOp("FNMSub", "FloatMultAccOp",
"fplibMulAdd<uint16_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
"fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
"fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" )
def buildBinFpOp(name, Name, base, opClass, halfOp, singleOp, doubleOp):
global header_output, decoder_output, exec_output
code = halfIntConvCode2 % { "op": halfOp }
hIop = InstObjParams(name, Name + "H", base,
{ "code": code,
"op_class": opClass }, [])
hIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
code = singleIntConvCode2 % { "op": singleOp }
sIop = InstObjParams(name, Name + "S", base,
{ "code": code,
"op_class": opClass }, [])
sIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
code = doubleIntConvCode2 % { "op": doubleOp }
dIop = InstObjParams(name, Name + "D", base,
{ "code": code,
"op_class": opClass }, [])
dIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
declareTempl = eval( base + "Declare");
constructorTempl = eval("AA64" + base + "Constructor");
for iop in hIop, sIop, dIop:
header_output += declareTempl.subst(iop)
decoder_output += constructorTempl.subst(iop)
exec_output += BasicExecute.subst(iop)
buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "FloatAddOp",
"fplibAdd<uint16_t>(cOp1, cOp2, fpscr)",
"fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
"fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "FloatAddOp",
"fplibSub<uint16_t>(cOp1, cOp2, fpscr)",
"fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
"fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "FloatDivOp",
"fplibDiv<uint16_t>(cOp1, cOp2, fpscr)",
"fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
"fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "FloatMultOp",
"fplibMul<uint16_t>(cOp1, cOp2, fpscr)",
"fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "FloatMultOp",
"fplibNeg<uint16_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
"fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
"fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "FloatCmpOp",
"fplibMin<uint16_t>(cOp1, cOp2, fpscr)",
"fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "FloatCmpOp",
"fplibMax<uint16_t>(cOp1, cOp2, fpscr)",
"fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "FloatCmpOp",
"fplibMinNum<uint16_t>(cOp1, cOp2, fpscr)",
"fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "FloatCmpOp",
"fplibMaxNum<uint16_t>(cOp1, cOp2, fpscr)",
"fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")
def buildUnaryFpOp(name, Name, base, opClass,
halfOp, singleOp, doubleOp = None):
if doubleOp is None:
doubleOp = singleOp
global header_output, decoder_output, exec_output
code = halfIntConvCode % { "op": halfOp }
hIop = InstObjParams(name, Name + "H", base,
{ "code": code,
"op_class": opClass }, [])
hIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
code = singleIntConvCode % { "op": singleOp }
sIop = InstObjParams(name, Name + "S", base,
{ "code": code,
"op_class": opClass }, [])
sIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
code = doubleIntConvCode % { "op": doubleOp }
dIop = InstObjParams(name, Name + "D", base,
{ "code": code,
"op_class": opClass }, [])
dIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
declareTempl = eval( base + "Declare");
constructorTempl = eval("AA64" + base + "Constructor");
for iop in hIop, sIop, dIop:
header_output += declareTempl.subst(iop)
decoder_output += constructorTempl.subst(iop)
exec_output += BasicExecute.subst(iop)
buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "FloatSqrtOp",
"fplibSqrt<uint16_t>(cOp1, fpscr)",
"fplibSqrt<uint32_t>(cOp1, fpscr)",
"fplibSqrt<uint64_t>(cOp1, fpscr)")
def buildSimpleUnaryFpOp(name, Name, base, opClass, halfOp, singleOp,
doubleOp = None, isIntConv = True):
if doubleOp is None:
doubleOp = singleOp
global header_output, decoder_output, exec_output
if isIntConv:
hCode = halfIntConvCode
sCode = singleIntConvCode
dCode = doubleIntConvCode
else:
hCode = halfCode
sCode = singleCode
dCode = doubleCode
for code, op, suffix in [[hCode, halfOp, "H"],
[sCode, singleOp, "S"],
[dCode, doubleOp, "D"]]:
iop = InstObjParams(name, Name + suffix, base,
{ "code": code % { "op": op },
"op_class": opClass }, [])
iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
declareTempl = eval( base + "Declare");
constructorTempl = eval("AA64" + base + "Constructor");
header_output += declareTempl.subst(iop)
decoder_output += constructorTempl.subst(iop)
exec_output += BasicExecute.subst(iop)
buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "FloatMiscOp",
"fplibNeg<uint16_t>(cOp1)",
"fplibNeg<uint32_t>(cOp1)",
"fplibNeg<uint64_t>(cOp1)")
buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "FloatMiscOp",
"fplibAbs<uint16_t>(cOp1)",
"fplibAbs<uint32_t>(cOp1)",
"fplibAbs<uint64_t>(cOp1)")
buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint16_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)")
buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint16_t>(cOp1, FPRounding_POSINF, false, fpscr)",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)")
buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint16_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)")
buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint16_t>(cOp1, FPRounding_ZERO, false, fpscr)",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)")
buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint16_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)")
buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint16_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
"fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)")
buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint16_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
"fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)")
}};
let {{
header_output = ""
decoder_output = ""
exec_output = ""
# Creates the integer to floating point instructions, including variants for
# signed/unsigned, float/double, etc
for regL, regOpL, width in [["W", "w", 32],
["X", "d", 64]]:
for isDouble in True, False:
for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)],
["S", "int%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]:
fcvtIntFpDCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
%s
''' %(usCode)
if isDouble:
fcvtIntFpDCode += '''
uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0,
%s, FPCRRounding(fpscr), fpscr);
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = cDest >> 32;
''' % ("true" if us == "U" else "false")
else:
fcvtIntFpDCode += '''
uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0,
%s, FPCRRounding(fpscr), fpscr);
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = 0;
''' % ("true" if us == "U" else "false")
fcvtIntFpDCode += '''
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S")
mnem = "%scvtf" %(us.lower())
fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
{ "code": fcvtIntFpDCode,
"op_class": "FloatCvtOp" }, [])
fcvtIntFpDIop.snippets["code"] += \
zeroSveVecRegUpperPartCode % "AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
exec_output += BasicExecute.subst(fcvtIntFpDIop);
# Generates the floating point to integer conversion instructions in various
# variants, eg signed/unsigned
def buildFpCvtIntOp(isDouble, isSigned, isXReg):
global header_output, decoder_output, exec_output
for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"],
["P", "FPRounding_POSINF"],
["M", "FPRounding_NEGINF"],
["Z", "FPRounding_ZERO"],
["A", "FPRounding_TIEAWAY"]]:
fcvtFpIntCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;'''
if isDouble:
fcvtFpIntCode += '''
uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
'''
else:
fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
fcvtFpIntCode += '''
%sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr);
FpscrExc = fpscr;
''' %("X" if isXReg else "W",
"64" if isDouble else "32",
"64" if isXReg else "32",
"false" if isSigned else "true",
roundingMode)
instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U",
"X" if isXReg else "W",
"D" if isDouble else "S", rmode)
mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
{ "code": fcvtFpIntCode,
"op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop);
decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
exec_output += BasicExecute.subst(fcvtFpIntIop);
# Now actually do the building with the different variants
for isDouble in True, False:
for isSigned in True, False:
for isXReg in True, False:
buildFpCvtIntOp(isDouble, isSigned, isXReg)
fcvtFpSFpDCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw,
FPCRRounding(fpscr), fpscr);
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = cDest >> 32;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
{ "code": fcvtFpSFpDCode,
"op_class": "FloatCvtOp" }, [])
fcvtFpSFpDIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
exec_output += BasicExecute.subst(fcvtFpSFpDIop);
fcvtFpDFpSCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1,
FPCRRounding(fpscr), fpscr);
AA64FpDestP1_uw = 0;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
{"code": fcvtFpDFpSCode,
"op_class": "FloatCvtOp" }, [])
fcvtFpDFpSIop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
exec_output += BasicExecute.subst(fcvtFpDFpSIop);
# Half precision to single or double precision conversion
for isDouble in True, False:
code = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
%s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw,
FPCRRounding(fpscr), fpscr);
''' % ("uint64_t" if isDouble else "uint32_t",
"64" if isDouble else "32")
if isDouble:
code += '''
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = cDest >> 32;
'''
else:
code += '''
AA64FpDestP0_uw = cDest;
AA64FpDestP1_uw = 0;
'''
code += '''
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
{ "code": code,
"op_class": "FloatCvtOp" }, [])
fcvtFpHFpIop.snippets["code"] += zeroSveVecRegUpperPartCode % \
"AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
exec_output += BasicExecute.subst(fcvtFpHFpIop);
# single or double precision to Half precision conversion
for isDouble in True, False:
code = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
%s;
AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1,
FPCRRounding(fpscr), fpscr);
AA64FpDestP1_uw = 0;
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw",
"64" if isDouble else "32")
instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
{ "code": code,
"op_class": "FloatCvtOp" }, [])
fcvtFpFpHIop.snippets["code"] += zeroSveVecRegUpperPartCode % \
"AA64FpDest"
header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
exec_output += BasicExecute.subst(fcvtFpFpHIop);
# Build the various versions of the floating point compare instructions
def buildFCmpOp(isQuiet, isDouble, isImm):
global header_output, decoder_output, exec_output
fcmpCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
%s cOp1 = %s;
''' % ("uint64_t" if isDouble else "uint32_t",
"AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32"
if isDouble else "AA64FpDestP0_uw")
if isImm:
fcmpCode += '''
%s cOp2 = imm;
''' % ("uint64_t" if isDouble else "uint32_t")
else:
fcmpCode += '''
%s cOp2 = %s;
''' % ("uint64_t" if isDouble else "uint32_t",
"AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
if isDouble else "AA64FpOp1P0_uw")
fcmpCode += '''
int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
CondCodesNZ = cc >> 2 & 3;
CondCodesC = cc >> 1 & 1;
CondCodesV = cc & 1;
FpCondCodes = fpscr & FpCondCodesMask;
FpscrExc = fpscr;
''' % ("64" if isDouble else "32", "false" if isQuiet else "true")
typeName = "Imm" if isImm else "Reg"
instName = "FCmp%s%s%s" %("" if isQuiet else "E", typeName,
"D" if isDouble else "S")
fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName,
"FpReg%sOp" %(typeName),
{"code": fcmpCode,
"op_class": "FloatCmpOp"}, [])
declareTemp = eval("FpReg%sOpDeclare" %(typeName));
constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName));
header_output += declareTemp.subst(fcmpIop);
decoder_output += constructorTemp.subst(fcmpIop);
exec_output += BasicExecute.subst(fcmpIop);
for isQuiet in True, False:
for isDouble in True, False:
for isImm in True, False:
buildFCmpOp(isQuiet, isDouble, isImm)
# Build the various versions of the conditional floating point compare
# instructions
def buildFCCmpOp(isQuiet, isDouble):
global header_output, decoder_output, exec_output
fccmpCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
%s cOp1 = %s;
%s cOp2 = %s;
int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
CondCodesNZ = cc >> 2 & 3;
CondCodesC = cc >> 1 & 1;
CondCodesV = cc & 1;
} else {
CondCodesNZ = (defCc >> 2) & 0x3;
CondCodesC = (defCc >> 1) & 0x1;
CondCodesV = defCc & 0x1;
}
FpCondCodes = fpscr & FpCondCodesMask;
FpscrExc = fpscr;
''' % ("uint64_t" if isDouble else "uint32_t",
"AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
if isDouble else "AA64FpOp1P0_uw",
"uint64_t" if isDouble else "uint32_t",
"AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32"
if isDouble else "AA64FpOp2P0_uw",
"64" if isDouble else "32", "false" if isQuiet else "true")
instName = "FCCmp%sReg%s" %("" if isQuiet else "E",
"D" if isDouble else "S")
fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"),
instName, "FpCondCompRegOp",
{"code": fccmpCode,
"op_class": "FloatCmpOp"}, [])
header_output += DataXCondCompRegDeclare.subst(fccmpIop);
decoder_output += DataXCondCompRegConstructor.subst(fccmpIop);
exec_output += BasicExecute.subst(fccmpIop);
for isQuiet in True, False:
for isDouble in True, False:
buildFCCmpOp(isQuiet, isDouble)
}};
let {{
header_output = ""
decoder_output = ""
exec_output = ""
# Generates the variants of the floating to fixed point instructions
def buildFpCvtFixedOp(isSigned, isDouble, isXReg):
global header_output, decoder_output, exec_output
fcvtFpFixedCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
'''
if isDouble:
fcvtFpFixedCode += '''
uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
'''
else:
fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
fcvtFpFixedCode += '''
%sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s,
FPRounding_ZERO, fpscr);
FpscrExc = fpscr;
''' %("X" if isXReg else "W",
"64" if isDouble else "32",
"64" if isXReg else "32",
"false" if isSigned else "true")
instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U",
"D" if isDouble else "S",
"X" if isXReg else "W")
mnem = "fcvtz%s" %("s" if isSigned else "u")
fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
{ "code": fcvtFpFixedCode,
"op_class": "FloatCvtOp" }, [])
header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop);
decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop);
exec_output += BasicExecute.subst(fcvtFpFixedIop);
# Generates the variants of the fixed to floating point instructions
def buildFixedCvtFpOp(isSigned, isDouble, isXReg):
global header_output, decoder_output, exec_output
srcRegType = "X" if isXReg else "W"
fcvtFixedFpCode = vfp64EnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
%s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm,
%s, FPCRRounding(fpscr), fpscr);
''' %("uint64_t" if isDouble else "uint32_t",
"64" if isDouble else "32",
"int" if isSigned else "uint", "64" if isXReg else "32",
srcRegType,
"false" if isSigned else "true")
if isDouble:
fcvtFixedFpCode += '''
AA64FpDestP0_uw = result;
AA64FpDestP1_uw = result >> 32;
'''
else:
fcvtFixedFpCode += '''
AA64FpDestP0_uw = result;
AA64FpDestP1_uw = 0;
'''
fcvtFixedFpCode += '''
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
FpscrExc = fpscr;
'''
instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U",
"D" if isDouble else "S",
srcRegType)
mnem = "%scvtf" %("s" if isSigned else "u")
fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
{ "code": fcvtFixedFpCode,
"op_class": "FloatCvtOp" }, [])
fcvtFixedFpIop.snippets["code"] += zeroSveVecRegUpperPartCode % \
"AA64FpDest"
header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop);
decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop);
exec_output += BasicExecute.subst(fcvtFixedFpIop);
# loop over the variants building the instructions for each
for isXReg in True, False:
for isDouble in True, False:
for isSigned in True, False:
buildFpCvtFixedOp(isSigned, isDouble, isXReg)
buildFixedCvtFpOp(isSigned, isDouble, isXReg)
}};
let {{
header_output = ""
decoder_output = ""
exec_output = ""
for isDouble in True, False:
code = '''
if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
AA64FpDestP0_uw = AA64FpOp1P0_uw;
'''
if isDouble:
code += '''
AA64FpDestP1_uw = AA64FpOp1P1_uw;
} else {
AA64FpDestP0_uw = AA64FpOp2P0_uw;
AA64FpDestP1_uw = AA64FpOp2P1_uw;
}
'''
else:
code += '''
} else {
AA64FpDestP0_uw = AA64FpOp2P0_uw;
}
AA64FpDestP1_uw = 0;
'''
code += '''
AA64FpDestP2_uw = 0;
AA64FpDestP3_uw = 0;
'''
iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"),
"FpCondSelOp", { "code": code,
"op_class": "FloatCvtOp" })
iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest"
header_output += DataXCondSelDeclare.subst(iop)
decoder_output += DataXCondSelConstructor.subst(iop)
exec_output += BasicExecute.subst(iop)
}};