blob: 7d765b21fc3cf5218cacf8522c387339c598c464 [file] [log] [blame]
// Copyright (c) 2009 The Regents of The University of Michigan
// Copyright (c) 2015 Advanced Micro Devices, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
def template MediaOpExecute {{
Fault
%(class_name)s::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
%(code)s;
//Write the resulting state to the execution context
if(fault == NoFault) {
%(op_wb)s;
}
return fault;
}
}};
def template MediaOpDeclare {{
class %(class_name)s : public %(base_class)s
{
private:
%(reg_idx_arr_decl)s;
public:
template <typename ...Args>
%(class_name)s(ExtMachInst mach_inst, const char *inst_mnem,
uint64_t set_flags, uint8_t src_size, uint8_t dest_size,
uint16_t _ext, Args... args) :
%(base_class)s(mach_inst, "%(mnemonic)s", inst_mnem, set_flags,
%(op_class)s, { args... }, src_size, dest_size, _ext)
{
%(set_reg_idx_arr)s;
%(constructor)s;
}
Fault execute(ExecContext *, Trace::InstRecord *) const override;
};
}};
let {{
# Make these empty strings so that concatenating onto
# them will always work.
header_output = ""
decoder_output = ""
exec_output = ""
class MediaOpMeta(type):
def buildCppClasses(self, name, Name, suffix, code, operand_types):
# Globals to stick the output in
global header_output
global exec_output
imm_operand_types = list([op if not op.isDual() else op.ImmType for
op in operand_types])
operand_types = list([op if not op.isDual() else op.FloatType for
op in operand_types])
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = \
re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
match = matcher.search(code)
if match:
typeQual = ""
if match.group("typeQual"):
typeQual = match.group("typeQual")
src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
self.buildCppClasses(name, Name, suffix,
matcher.sub(src2_name, code), operand_types)
self.buildCppClasses(name + "i", Name, suffix + "Imm",
matcher.sub("imm8", code), imm_operand_types)
return
base = "X86ISA::InstOperands<" + \
", ".join(["X86ISA::MediaOpBase"] +
[op.cxx_class() for op in operand_types]) + ">"
# Get everything ready for the substitution
opt_args = []
if self.op_class:
opt_args.append(self.op_class)
iop = InstObjParams(name, Name + suffix, base, {"code" : code},
opt_args)
# Generate the actual code (finally!)
header_output += MediaOpDeclare.subst(iop)
exec_output += MediaOpExecute.subst(iop)
def __new__(mcls, Name, bases, dict):
abstract = False
name = Name.lower()
if "abstract" in dict:
abstract = dict['abstract']
del dict['abstract']
if not "op_class" in dict:
dict["op_class"] = None
cls = super().__new__(mcls, Name, bases, dict)
if not abstract:
cls.className = Name
cls.base_mnemonic = name
code = cls.code
operand_types = cls.operand_types
# Set up the C++ classes
mcls.buildCppClasses(cls, name, Name, "", code, operand_types)
# Hook into the microassembler dict
global microopClasses
microopClasses[name] = cls
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
if matcher.search(code):
microopClasses[name + 'i'] = cls
return cls
class MediaOp(X86Microop, metaclass=MediaOpMeta):
# This class itself doesn't act as a microop
abstract = True
def __init__(self, *args, size=None, destSize=None, srcSize=None,
ext=None):
self.args = list(map(str, args))
self.srcSize = None
self.destSize = None
if size is not None:
self.srcSize = size
self.destSize = size
if srcSize is not None:
self.srcSize = srcSize
if destSize is not None:
self.destSize = destSize
if self.srcSize is None:
raise Exception("Source size not set.")
if self.destSize is None:
raise Exception("Dest size not set.")
if ext is None:
self.ext = 0
else:
self.ext = ext
def getAllocator(self, microFlags):
className = self.className
is_imm = (self.mnemonic == self.base_mnemonic + 'i')
def resolve_dual(t):
if t.isDual():
if is_imm:
return t.ImmType
else:
return t.FloatType
else:
return t
operand_types = map(resolve_dual, self.operand_types)
if is_imm:
className += "Imm"
arg_iter = iter(self.args)
args = list([Type(arg_iter).ctor_args() for Type in operand_types])
ext_args = [str(self.ext)] + args
allocator = '''new %(class_name)s(machInst, macrocodeBlock,
%(flags)s, %(srcSize)s, %(destSize)s, %(ext_args)s)
''' % {
"class_name" : className,
"flags" : self.microFlagsText(microFlags),
"srcSize" : self.srcSize,
"destSize" : self.destSize,
"ext_args" : ", ".join(ext_args)
}
return allocator
class Media0Op(MediaOp):
abstract = True
operand_types = ()
def __init__(self, **kwargs):
super().__init__(**kwargs)
class Media2Op(MediaOp):
abstract = True
operand_types = (FloatDestOp, FloatSrc1Op)
def __init__(self, op1, op2, **kwargs):
super().__init__(op1, op2, **kwargs)
class Media3Op(MediaOp):
abstract = True
operand_types = (FloatDestOp, FloatSrc1Op, FloatSrc2Op)
def __init__(self, op1, op2, op3, **kwargs):
super().__init__(op1, op2, op3, **kwargs)
class Mov2int(Media3Op):
def __init__(self, dest, src1, src2=0, **kwargs):
super().__init__(dest, src1, src2, **kwargs)
operand_types = (IntDestOp, FloatSrc1Op, Imm8Op)
op_class = 'SimdMiscOp'
code = '''
int items = sizeof(double) / srcSize;
int offset = imm8;
if (bits(src1, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
uint64_t fpSrcReg1 =
bits(FpSrcReg1_uqw,
(offset + 1) * srcSize * 8 - 1,
(offset + 0) * srcSize * 8);
DestReg = merge(0, dest, fpSrcReg1, destSize);
} else {
DestReg = DestReg;
}
'''
class Mov2fp(Media3Op):
def __init__(self, dest, src1, src2=0, **kwargs):
super().__init__(dest, src1, src2, **kwargs)
operand_types = (FloatDestOp, IntSrc1Op, Imm8Op)
op_class = 'SimdMiscOp'
code = '''
int items = sizeof(double) / destSize;
int offset = imm8;
if (bits(dest, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
FpDestReg_uqw =
insertBits(FpDestReg_uqw,
(offset + 1) * destSize * 8 - 1,
(offset + 0) * destSize * 8, PMSrcReg1);
} else {
FpDestReg_uqw = FpDestReg_uqw;
}
'''
class Movsign(Media2Op):
operand_types = (IntDestOp, FloatSrc1Op)
op_class = 'SimdMiscOp'
code = '''
int items = sizeof(double) / srcSize;
uint64_t result = 0;
int offset = (ext & 0x1) ? items : 0;
for (int i = 0; i < items; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
result = insertBits(result, i + offset, i + offset, picked);
}
DestReg = DestReg | result;
'''
class Maskmov(Media3Op):
op_class = 'SimdMiscOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (bits(FpSrcReg2_uqw, hiIndex))
result = insertBits(result, hiIndex, loIndex, arg1Bits);
}
FpDestReg_uqw = result;
'''
class shuffle(Media3Op):
op_class = 'SimdMiscOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = sizeof(double) / size;
int options;
int optionBits;
if (size == 8) {
options = 2;
optionBits = 1;
} else if (size == 1) {
options = 16;
optionBits = 8;
} else {
options = 4;
optionBits = 2;
}
uint64_t result = 0;
// PSHUFB stores shuffle encoding in destination XMM register
// directly (instead of passed in by ext).
uint64_t sel = (size == 1) ? FpDestReg_uqw : ext;
for (int i = 0; i < items; i++) {
uint64_t resBits;
uint8_t lsel = sel & mask(optionBits);
if (size == 1 && bits(lsel, 7)) {
// PSHUFB sets result byte to zero when highest bit of the
// corresponding shuffle encoding is 1.
resBits = 0;
} else if (lsel * size >= sizeof(double)) {
lsel -= options / 2;
resBits = bits(FpSrcReg2_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
} else {
resBits = bits(FpSrcReg1_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
}
sel >>= optionBits;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Unpack(Media3Op):
op_class = 'SimdMiscOp'
code = '''
assert(srcSize == destSize);
int size = destSize;
int items = (sizeof(double) / size) / 2;
int offset = ext ? items : 0;
uint64_t result = 0;
for (int i = 0; i < items; i++) {
uint64_t pickedLow =
bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 1) * 8 * size - 1,
(2 * i + 0) * 8 * size,
pickedLow);
uint64_t pickedHigh =
bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 2) * 8 * size - 1,
(2 * i + 1) * 8 * size,
pickedHigh);
}
FpDestReg_uqw = result;
'''
class Pack(Media3Op):
op_class = 'SimdMiscOp'
code = '''
assert(srcSize == destSize * 2);
int items = (sizeof(double) / destSize);
int destBits = destSize * 8;
int srcBits = srcSize * 8;
uint64_t result = 0;
int i;
for (i = 0; i < items / 2; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
(i + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (1ULL << (destBits - 1));
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
for (;i < items; i++) {
uint64_t picked =
bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
(i - items + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (1ULL << (destBits - 1));
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
FpDestReg_uqw = result;
'''
class Mxor(Media3Op):
def __init__(self, dest, src1, src2):
super().__init__(dest, src1, src2, size=1)
op_class = 'SimdAluOp'
code = '''
FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
'''
class Mor(Media3Op):
def __init__(self, dest, src1, src2):
super().__init__(dest, src1, src2, size=1)
op_class = 'SimdAluOp'
code = '''
FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
'''
class Mand(Media3Op):
def __init__(self, dest, src1, src2):
super().__init__(dest, src1, src2, size=1)
op_class = 'SimdAluOp'
code = '''
FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mandn(Media3Op):
def __init__(self, dest, src1, src2):
super().__init__(dest, src1, src2, size=1)
op_class = 'SimdAluOp'
code = '''
FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mminf(Media3Op):
op_class = 'SimdFloatCmpOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 < arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
class Mmaxf(Media3Op):
op_class = 'SimdFloatCmpOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 > arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
class Mmini(Media3Op):
op_class = 'SimdCmpOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (1ULL << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (1ULL << (sizeBits - 1))));
uint64_t resBits;
if (signedOp()) {
if (arg1 < arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits < arg2Bits) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmaxi(Media3Op):
op_class = 'SimdCmpOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (1ULL << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (1ULL << (sizeBits - 1))));
uint64_t resBits;
if (signedOp()) {
if (arg1 > arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits > arg2Bits) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msqrt(Media2Op):
op_class = 'SimdFloatSqrtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = argBits;
fi.f = sqrt(fi.f);
argBits = fi.i;
} else {
doubleInt di;
di.i = argBits;
di.d = sqrt(di.d);
argBits = di.i;
}
result = insertBits(result, hiIndex, loIndex, argBits);
}
FpDestReg_uqw = result;
'''
# compute approximate reciprocal --- single-precision only
class Mrcp(Media2Op):
op_class = 'SimdFloatAluOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
assert(srcSize == 4); // ISA defines single-precision only
assert(srcSize == destSize);
const int size = 4;
const int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
floatInt fi;
fi.i = argBits;
// This is more accuracy than HW provides, but oh well
fi.f = 1.0 / fi.f;
argBits = fi.i;
result = insertBits(result, hiIndex, loIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Maddf(Media3Op):
op_class = 'SimdFloatAddOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f + arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d + arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubf(Media3Op):
op_class = 'SimdFloatAddOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f - arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d - arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmulf(Media3Op):
op_class = 'SimdFloatMultOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f * arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d * arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mdivf(Media3Op):
op_class = 'SimdFloatDivOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f / arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d / arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Maddi(Media3Op):
op_class = 'SimdAddOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits + arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (1ULL << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
resBits = mask(sizeBits);
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubi(Media3Op):
op_class = 'SimdAddOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits - arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = !bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (1ULL << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (arg2Bits > arg1Bits) {
resBits = 0;
} else if (!findCarry(sizeBits, resBits,
arg1Bits, ~arg2Bits)) {
resBits = mask(sizeBits);
}
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmuli(Media3Op):
op_class = 'SimdMultOp'
code = '''
int srcBits = srcSize * 8;
int destBits = destSize * 8;
assert(destBits <= 64);
assert(destSize >= srcSize);
int items = numItems(destSize);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int offset = 0;
if (ext & 16) {
if (ext & 32)
offset = i * (destBits - srcBits);
else
offset = i * (destBits - srcBits) + srcBits;
}
int srcHiIndex = (i + 1) * srcBits - 1 + offset;
int srcLoIndex = (i + 0) * srcBits + offset;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
uint64_t resBits;
if (signedOp()) {
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (1ULL << (srcBits - 1))));
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (1ULL << (srcBits - 1))));
resBits = (uint64_t)(arg1 * arg2);
} else {
resBits = arg1Bits * arg2Bits;
}
if (ext & 0x4)
resBits += (1ULL << (destBits - 1));
if (multHi())
resBits >>= destBits;
int destHiIndex = (i + 1) * destBits - 1;
int destLoIndex = (i + 0) * destBits;
result = insertBits(result, destHiIndex, destLoIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mavg(Media3Op):
op_class = 'SimdAddOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msad(Media3Op):
op_class = 'SimdAddOp'
code = '''
int srcBits = srcSize * 8;
int items = sizeof(double) / srcSize;
uint64_t sum = 0;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * srcBits - 1;
int loIndex = (i + 0) * srcBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t resBits = arg1Bits - arg2Bits;
if (resBits < 0)
resBits = -resBits;
sum += resBits;
}
FpDestReg_uqw = sum & mask(destSize * 8);
'''
class Msrl(Media3Op):
operand_types = (FloatDestOp, FloatSrc1Op, Op2)
op_class = 'SimdShiftOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt) &
mask(sizeBits - shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msra(Media3Op):
operand_types = (FloatDestOp, FloatSrc1Op, Op2)
op_class = 'SimdShiftOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
if (bits(arg1Bits, sizeBits - 1))
resBits = mask(sizeBits);
else
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt);
resBits = resBits |
(0 - (resBits & (1ULL << (sizeBits - 1 - shiftAmt))));
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msll(Media3Op):
operand_types = (FloatDestOp, FloatSrc1Op, Op2)
op_class = 'SimdShiftOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits << shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2i(Media2Op):
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (ext & 0x4) {
if (arg >= 0)
arg += 0.5;
else
arg -= 0.5;
}
if (destSize == 4) {
int32_t i_arg = (int32_t)arg;
argBits = *((uint32_t*)&i_arg);
} else {
int64_t i_arg = (int64_t)arg;
argBits = *((uint64_t*)&i_arg);
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvti2f(Media2Op):
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
int64_t sArg = argBits |
(0 - (argBits & (1ULL << (srcSizeBits - 1))));
double arg = sArg;
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2f(Media2Op):
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Mcmpi2r(Media3Op):
op_class = 'SimdCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (1ULL << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (1ULL << (sizeBits - 1))));
uint64_t resBits = 0;
if (((ext & 0x2) == 0 && arg1 == arg2) ||
((ext & 0x2) == 0x2 && arg1 > arg2))
resBits = mask(sizeBits);
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2r(Media3Op):
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
double arg1, arg2;
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
uint64_t resBits = 0;
bool nanop = std::isnan(arg1) || std::isnan(arg2);
switch (ext & mask(3)) {
case 0:
if (arg1 == arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 1:
if (arg1 < arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 2:
if (arg1 <= arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 3:
if (nanop)
resBits = mask(sizeBits);
break;
case 4:
if (arg1 != arg2 || nanop)
resBits = mask(sizeBits);
break;
case 5:
if (!(arg1 < arg2) || nanop)
resBits = mask(sizeBits);
break;
case 6:
if (!(arg1 <= arg2) || nanop)
resBits = mask(sizeBits);
break;
case 7:
if (!nanop)
resBits = mask(sizeBits);
break;
};
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2rf(Media2Op):
operand_types = (FloatSrc1Op, FloatSrc2Op)
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
assert(srcSize == 4 || srcSize == 8);
int size = srcSize;
int sizeBits = size * 8;
double arg1, arg2;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
// ZF PF CF
// Unordered 1 1 1
// Greater than 0 0 0
// Less than 0 0 1
// Equal 1 0 0
// OF = SF = AF = 0
ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
cfofBits = cfofBits & ~(OFBit | CFBit);
if (std::isnan(arg1) || std::isnan(arg2)) {
ccFlagBits = ccFlagBits | (ZFBit | PFBit);
cfofBits = cfofBits | CFBit;
}
else if(arg1 < arg2)
cfofBits = cfofBits | CFBit;
else if(arg1 == arg2)
ccFlagBits = ccFlagBits | ZFBit;
'''
class Emms(Media0Op):
def __init__(self):
super().__init__(size=2)
op_class = 'FloatMiscOp'
code = 'FTW = 0xFFFF;'
}};