blob: f9c6a9f9b3baf10a393d1099eaeb3f97dc65f546 [file] [log] [blame]
// Copyright (c) 2009 The Regents of The University of Michigan
// Copyright (c) 2015 Advanced Micro Devices, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Gabe Black
def template MediaOpExecute {{
Fault %(class_name)s::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
%(code)s;
//Write the resulting state to the execution context
if(fault == NoFault)
{
%(op_wb)s;
}
return fault;
}
}};
def template MediaOpRegDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst _machInst,
const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
Fault execute(ExecContext *, Trace::InstRecord *) const;
};
}};
def template MediaOpImmDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst _machInst,
const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
Fault execute(ExecContext *, Trace::InstRecord *) const;
};
}};
def template MediaOpRegConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
%(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
_src1, _src2, _dest, _srcSize, _destSize, _ext,
%(op_class)s)
{
%(constructor)s;
}
}};
def template MediaOpImmConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
%(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
_src1, _imm8, _dest, _srcSize, _destSize, _ext,
%(op_class)s)
{
%(constructor)s;
}
}};
let {{
# Make these empty strings so that concatenating onto
# them will always work.
header_output = ""
decoder_output = ""
exec_output = ""
immTemplates = (
MediaOpImmDeclare,
MediaOpImmConstructor,
MediaOpExecute)
regTemplates = (
MediaOpRegDeclare,
MediaOpRegConstructor,
MediaOpExecute)
class MediaOpMeta(type):
def buildCppClasses(self, name, Name, suffix, code):
# Globals to stick the output in
global header_output
global decoder_output
global exec_output
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
match = matcher.search(code)
if match:
typeQual = ""
if match.group("typeQual"):
typeQual = match.group("typeQual")
src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
self.buildCppClasses(name, Name, suffix,
matcher.sub(src2_name, code))
self.buildCppClasses(name + "i", Name, suffix + "Imm",
matcher.sub("imm8", code))
return
base = "X86ISA::MediaOp"
# If imm8 shows up in the code, use the immediate templates, if
# not, hopefully the register ones will be correct.
matcher = re.compile("(?<!\w)imm8(?!\w)")
if matcher.search(code):
base += "Imm"
templates = immTemplates
else:
base += "Reg"
templates = regTemplates
# Get everything ready for the substitution
opt_args = []
if self.op_class:
opt_args.append(self.op_class)
iop = InstObjParams(name, Name + suffix, base, {"code" : code},
opt_args)
# Generate the actual code (finally!)
header_output += templates[0].subst(iop)
decoder_output += templates[1].subst(iop)
exec_output += templates[2].subst(iop)
def __new__(mcls, Name, bases, dict):
abstract = False
name = Name.lower()
if "abstract" in dict:
abstract = dict['abstract']
del dict['abstract']
if not "op_class" in dict:
dict["op_class"] = None
cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
if not abstract:
cls.className = Name
cls.base_mnemonic = name
code = cls.code
# Set up the C++ classes
mcls.buildCppClasses(cls, name, Name, "", code)
# Hook into the microassembler dict
global microopClasses
microopClasses[name] = cls
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
if matcher.search(code):
microopClasses[name + 'i'] = cls
return cls
class MediaOp(X86Microop):
__metaclass__ = MediaOpMeta
# This class itself doesn't act as a microop
abstract = True
def __init__(self, dest, src1, op2,
size = None, destSize = None, srcSize = None, ext = None):
self.dest = dest
self.src1 = src1
self.op2 = op2
if size is not None:
self.srcSize = size
self.destSize = size
if srcSize is not None:
self.srcSize = srcSize
if destSize is not None:
self.destSize = destSize
if self.srcSize is None:
raise Exception, "Source size not set."
if self.destSize is None:
raise Exception, "Dest size not set."
if ext is None:
self.ext = 0
else:
self.ext = ext
def getAllocator(self, microFlags):
className = self.className
if self.mnemonic == self.base_mnemonic + 'i':
className += "Imm"
allocator = '''new %(class_name)s(machInst, macrocodeBlock,
%(flags)s, %(src1)s, %(op2)s, %(dest)s,
%(srcSize)s, %(destSize)s, %(ext)s)''' % {
"class_name" : className,
"flags" : self.microFlagsText(microFlags),
"src1" : self.src1, "op2" : self.op2,
"dest" : self.dest,
"srcSize" : self.srcSize,
"destSize" : self.destSize,
"ext" : self.ext}
return allocator
class Mov2int(MediaOp):
def __init__(self, dest, src1, src2 = 0, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mov2int, self).__init__(dest, src1,\
src2, size, destSize, srcSize, ext)
op_class = 'SimdMiscOp'
code = '''
int items = sizeof(FloatRegBits) / srcSize;
int offset = imm8;
if (bits(src1, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
uint64_t fpSrcReg1 =
bits(FpSrcReg1_uqw,
(offset + 1) * srcSize * 8 - 1,
(offset + 0) * srcSize * 8);
DestReg = merge(0, fpSrcReg1, destSize);
} else {
DestReg = DestReg;
}
'''
class Mov2fp(MediaOp):
def __init__(self, dest, src1, src2 = 0, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mov2fp, self).__init__(dest, src1,\
src2, size, destSize, srcSize, ext)
op_class = 'SimdMiscOp'
code = '''
int items = sizeof(FloatRegBits) / destSize;
int offset = imm8;
if (bits(dest, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
FpDestReg_uqw =
insertBits(FpDestReg_uqw,
(offset + 1) * destSize * 8 - 1,
(offset + 0) * destSize * 8, srcReg1);
} else {
FpDestReg_uqw = FpDestReg_uqw;
}
'''
class Movsign(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Movsign, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
op_class = 'SimdMiscOp'
code = '''
int items = sizeof(FloatRegBits) / srcSize;
uint64_t result = 0;
int offset = (ext & 0x1) ? items : 0;
for (int i = 0; i < items; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
result = insertBits(result, i + offset, i + offset, picked);
}
DestReg = DestReg | result;
'''
class Maskmov(MediaOp):
op_class = 'SimdMiscOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (bits(FpSrcReg2_uqw, hiIndex))
result = insertBits(result, hiIndex, loIndex, arg1Bits);
}
FpDestReg_uqw = result;
'''
class shuffle(MediaOp):
op_class = 'SimdMiscOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = sizeof(FloatRegBits) / size;
int options;
int optionBits;
if (size == 8) {
options = 2;
optionBits = 1;
} else {
options = 4;
optionBits = 2;
}
uint64_t result = 0;
uint8_t sel = ext;
for (int i = 0; i < items; i++) {
uint64_t resBits;
uint8_t lsel = sel & mask(optionBits);
if (lsel * size >= sizeof(FloatRegBits)) {
lsel -= options / 2;
resBits = bits(FpSrcReg2_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
} else {
resBits = bits(FpSrcReg1_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
}
sel >>= optionBits;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Unpack(MediaOp):
op_class = 'SimdMiscOp'
code = '''
assert(srcSize == destSize);
int size = destSize;
int items = (sizeof(FloatRegBits) / size) / 2;
int offset = ext ? items : 0;
uint64_t result = 0;
for (int i = 0; i < items; i++) {
uint64_t pickedLow =
bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 1) * 8 * size - 1,
(2 * i + 0) * 8 * size,
pickedLow);
uint64_t pickedHigh =
bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 2) * 8 * size - 1,
(2 * i + 1) * 8 * size,
pickedHigh);
}
FpDestReg_uqw = result;
'''
class Pack(MediaOp):
op_class = 'SimdMiscOp'
code = '''
assert(srcSize == destSize * 2);
int items = (sizeof(FloatRegBits) / destSize);
int destBits = destSize * 8;
int srcBits = srcSize * 8;
uint64_t result = 0;
int i;
for (i = 0; i < items / 2; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
(i + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (ULL(1) << (destBits - 1));
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
for (;i < items; i++) {
uint64_t picked =
bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
(i - items + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (ULL(1) << (destBits - 1));
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
FpDestReg_uqw = result;
'''
class Mxor(MediaOp):
def __init__(self, dest, src1, src2):
super(Mxor, self).__init__(dest, src1, src2, 1)
op_class = 'SimdAluOp'
code = '''
FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
'''
class Mor(MediaOp):
def __init__(self, dest, src1, src2):
super(Mor, self).__init__(dest, src1, src2, 1)
op_class = 'SimdAluOp'
code = '''
FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
'''
class Mand(MediaOp):
def __init__(self, dest, src1, src2):
super(Mand, self).__init__(dest, src1, src2, 1)
op_class = 'SimdAluOp'
code = '''
FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mandn(MediaOp):
def __init__(self, dest, src1, src2):
super(Mandn, self).__init__(dest, src1, src2, 1)
op_class = 'SimdAluOp'
code = '''
FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mminf(MediaOp):
op_class = 'SimdFloatCmpOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 < arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
class Mmaxf(MediaOp):
op_class = 'SimdFloatCmpOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 > arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
class Mmini(MediaOp):
op_class = 'SimdCmpOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits;
if (signedOp()) {
if (arg1 < arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits < arg2Bits) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmaxi(MediaOp):
op_class = 'SimdCmpOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits;
if (signedOp()) {
if (arg1 > arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits > arg2Bits) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msqrt(MediaOp):
op_class = 'SimdFloatSqrtOp'
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Msqrt, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = argBits;
fi.f = sqrt(fi.f);
argBits = fi.i;
} else {
doubleInt di;
di.i = argBits;
di.d = sqrt(di.d);
argBits = di.i;
}
result = insertBits(result, hiIndex, loIndex, argBits);
}
FpDestReg_uqw = result;
'''
# compute approximate reciprocal --- single-precision only
class Mrcp(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mrcp, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
op_class = 'SimdFloatAluOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
assert(srcSize == 4); // ISA defines single-precision only
assert(srcSize == destSize);
const int size = 4;
const int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
floatInt fi;
fi.i = argBits;
// This is more accuracy than HW provides, but oh well
fi.f = 1.0 / fi.f;
argBits = fi.i;
result = insertBits(result, hiIndex, loIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Maddf(MediaOp):
op_class = 'SimdFloatAddOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f + arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d + arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubf(MediaOp):
op_class = 'SimdFloatAddOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f - arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d - arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmulf(MediaOp):
op_class = 'SimdFloatMultOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f * arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d * arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mdivf(MediaOp):
op_class = 'SimdFloatDivOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f / arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d / arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Maddi(MediaOp):
op_class = 'SimdAddOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits + arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (ULL(1) << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
resBits = mask(sizeBits);
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubi(MediaOp):
op_class = 'SimdAddOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits - arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = !bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (ULL(1) << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (arg2Bits > arg1Bits) {
resBits = 0;
} else if (!findCarry(sizeBits, resBits,
arg1Bits, ~arg2Bits)) {
resBits = mask(sizeBits);
}
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmuli(MediaOp):
op_class = 'SimdMultOp'
code = '''
int srcBits = srcSize * 8;
int destBits = destSize * 8;
assert(destBits <= 64);
assert(destSize >= srcSize);
int items = numItems(destSize);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int offset = 0;
if (ext & 16) {
if (ext & 32)
offset = i * (destBits - srcBits);
else
offset = i * (destBits - srcBits) + srcBits;
}
int srcHiIndex = (i + 1) * srcBits - 1 + offset;
int srcLoIndex = (i + 0) * srcBits + offset;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
uint64_t resBits;
if (signedOp()) {
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
resBits = (uint64_t)(arg1 * arg2);
} else {
resBits = arg1Bits * arg2Bits;
}
if (ext & 0x4)
resBits += (ULL(1) << (destBits - 1));
if (multHi())
resBits >>= destBits;
int destHiIndex = (i + 1) * destBits - 1;
int destLoIndex = (i + 0) * destBits;
result = insertBits(result, destHiIndex, destLoIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mavg(MediaOp):
op_class = 'SimdAddOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msad(MediaOp):
op_class = 'SimdAddOp'
code = '''
int srcBits = srcSize * 8;
int items = sizeof(FloatRegBits) / srcSize;
uint64_t sum = 0;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * srcBits - 1;
int loIndex = (i + 0) * srcBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t resBits = arg1Bits - arg2Bits;
if (resBits < 0)
resBits = -resBits;
sum += resBits;
}
FpDestReg_uqw = sum & mask(destSize * 8);
'''
class Msrl(MediaOp):
op_class = 'SimdShiftOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt) &
mask(sizeBits - shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msra(MediaOp):
op_class = 'SimdShiftOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
if (bits(arg1Bits, sizeBits - 1))
resBits = mask(sizeBits);
else
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt);
resBits = resBits |
(0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msll(MediaOp):
op_class = 'SimdShiftOp'
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits << shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2i(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvtf2i, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (ext & 0x4) {
if (arg >= 0)
arg += 0.5;
else
arg -= 0.5;
}
if (destSize == 4) {
int32_t i_arg = (int32_t)arg;
argBits = *((uint32_t*)&i_arg);
} else {
int64_t i_arg = (int64_t)arg;
argBits = *((uint64_t*)&i_arg);
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvti2f(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvti2f, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
int64_t sArg = argBits |
(0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
double arg = sArg;
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2f(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvtf2f, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Mcmpi2r(MediaOp):
op_class = 'SimdCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits = 0;
if (((ext & 0x2) == 0 && arg1 == arg2) ||
((ext & 0x2) == 0x2 && arg1 > arg2))
resBits = mask(sizeBits);
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2r(MediaOp):
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
double arg1, arg2;
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
uint64_t resBits = 0;
bool nanop = std::isnan(arg1) || std::isnan(arg2);
switch (ext & mask(3)) {
case 0:
if (arg1 == arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 1:
if (arg1 < arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 2:
if (arg1 <= arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 3:
if (nanop)
resBits = mask(sizeBits);
break;
case 4:
if (arg1 != arg2 || nanop)
resBits = mask(sizeBits);
break;
case 5:
if (!(arg1 < arg2) || nanop)
resBits = mask(sizeBits);
break;
case 6:
if (!(arg1 <= arg2) || nanop)
resBits = mask(sizeBits);
break;
case 7:
if (!nanop)
resBits = mask(sizeBits);
break;
};
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2rf(MediaOp):
def __init__(self, src1, src2,\
size = None, destSize = None, srcSize = None, ext = None):
super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
src2, size, destSize, srcSize, ext)
op_class = 'SimdFloatCvtOp'
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
assert(srcSize == 4 || srcSize == 8);
int size = srcSize;
int sizeBits = size * 8;
double arg1, arg2;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
// ZF PF CF
// Unordered 1 1 1
// Greater than 0 0 0
// Less than 0 0 1
// Equal 1 0 0
// OF = SF = AF = 0
ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
cfofBits = cfofBits & ~(OFBit | CFBit);
if (std::isnan(arg1) || std::isnan(arg2)) {
ccFlagBits = ccFlagBits | (ZFBit | PFBit);
cfofBits = cfofBits | CFBit;
}
else if(arg1 < arg2)
cfofBits = cfofBits | CFBit;
else if(arg1 == arg2)
ccFlagBits = ccFlagBits | ZFBit;
'''
class Emms(MediaOp):
op_class = 'FloatMiscOp'
def __init__(self):
super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)',
'InstRegIndex(0)', 'InstRegIndex(0)', 2)
code = 'FTW = 0xFFFF;'
}};