blob: c60ba624031fb0f1a05406c02215528d65e7c77a [file] [log] [blame]
/*
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "arch/amdgpu/vega/insts/instructions.hh"
#include <cmath>
#include "arch/amdgpu/vega/insts/inst_util.hh"
#include "debug/VEGA.hh"
#include "debug/GPUSync.hh"
#include "gpu-compute/shader.hh"
namespace gem5
{
namespace VegaISA
{
// --- Inst_SOP2__S_ADD_U32 class methods ---
Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_add_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_ADD_U32
Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32()
{
} // ~Inst_SOP2__S_ADD_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u;
// SCC = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an unsigned
// --- overflow/carry-out for S_ADDC_U32.
void
Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() + src1.rawData();
scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData())
>= 0x100000000ULL ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_SUB_U32 class methods ---
Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_sub_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_SUB_U32
Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32()
{
} // ~Inst_SOP2__S_SUB_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u;
// SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out for
// --- S_SUBB_U32.
void
Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() - src1.rawData();
scc = (src1.rawData() > src0.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ADD_I32 class methods ---
Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_add_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_ADD_I32
Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32()
{
} // ~Inst_SOP2__S_ADD_I32
// --- description from .arch file ---
// D.i = S0.i + S1.i;
// SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed
// overflow.
// This opcode is not suitable for use with S_ADDC_U32 for implementing
// 64-bit operations.
void
Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() + src1.rawData();
scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31)
&& bits(src0.rawData(), 31) != bits(sdst.rawData(), 31))
? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_SUB_I32 class methods ---
Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_sub_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_SUB_I32
Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32()
{
} // ~Inst_SOP2__S_SUB_I32
// --- description from .arch file ---
// D.i = S0.i - S1.i;
// SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed
// overflow.
// CAUTION: The condition code behaviour for this opcode is inconsistent
// with V_SUB_I32; see V_SUB_I32 for further details.
// This opcode is not suitable for use with S_SUBB_U32 for implementing
// 64-bit operations.
void
Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() - src1.rawData();
scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31)
&& bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ADDC_U32 class methods ---
Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_addc_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_ADDC_U32
Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32()
{
} // ~Inst_SOP2__S_ADDC_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u + SCC;
// SCC = (S0.u + S1.u + SCC >= 0x800000000ULL ? 1 : 0) is an unsigned
// overflow.
void
Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc.read();
sdst = src0.rawData() + src1.rawData() + scc.rawData();
scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()
+ (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_SUBB_U32 class methods ---
Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_subb_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_SUBB_U32
Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32()
{
} // ~Inst_SOP2__S_SUBB_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u - SCC;
// SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow.
void
Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc.read();
sdst = src0.rawData() - src1.rawData() - scc.rawData();
scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_MIN_I32 class methods ---
Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_min_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_MIN_I32
Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32()
{
} // ~Inst_SOP2__S_MIN_I32
// --- description from .arch file ---
// D.i = (S0.i < S1.i) ? S0.i : S1.i;
// SCC = 1 if S0 is chosen as the minimum value.
void
Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = std::min(src0.rawData(), src1.rawData());
scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_MIN_U32 class methods ---
Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_min_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_MIN_U32
Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32()
{
} // ~Inst_SOP2__S_MIN_U32
// --- description from .arch file ---
// D.u = (S0.u < S1.u) ? S0.u : S1.u;
// SCC = 1 if S0 is chosen as the minimum value.
void
Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = std::min(src0.rawData(), src1.rawData());
scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_MAX_I32 class methods ---
Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_max_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_MAX_I32
Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32()
{
} // ~Inst_SOP2__S_MAX_I32
// --- description from .arch file ---
// D.i = (S0.i > S1.i) ? S0.i : S1.i;
// SCC = 1 if S0 is chosen as the maximum value.
void
Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = std::max(src0.rawData(), src1.rawData());
scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_MAX_U32 class methods ---
Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_max_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_MAX_U32
Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32()
{
} // ~Inst_SOP2__S_MAX_U32
// --- description from .arch file ---
// D.u = (S0.u > S1.u) ? S0.u : S1.u;
// SCC = 1 if S0 is chosen as the maximum value.
void
Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = std::max(src0.rawData(), src1.rawData());
scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_CSELECT_B32 class methods ---
Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_cselect_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_CSELECT_B32
Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32()
{
} // ~Inst_SOP2__S_CSELECT_B32
// --- description from .arch file ---
// D.u = SCC ? S0.u : S1.u (conditional select).
void
Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc.read();
sdst = scc.rawData() ? src0.rawData() : src1.rawData();
sdst.write();
} // execute
// --- Inst_SOP2__S_CSELECT_B64 class methods ---
Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_cselect_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_CSELECT_B64
Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64()
{
} // ~Inst_SOP2__S_CSELECT_B64
// --- description from .arch file ---
// D.u64 = SCC ? S0.u64 : S1.u64 (conditional select).
void
Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc.read();
sdst = scc.rawData() ? src0.rawData() : src1.rawData();
sdst.write();
} // execute
// --- Inst_SOP2__S_AND_B32 class methods ---
Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_and_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_AND_B32
Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32()
{
} // ~Inst_SOP2__S_AND_B32
// --- description from .arch file ---
// D.u = S0.u & S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() & src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_AND_B64 class methods ---
Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_and_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_AND_B64
Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64()
{
} // ~Inst_SOP2__S_AND_B64
// --- description from .arch file ---
// D.u64 = S0.u64 & S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() & src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_OR_B32 class methods ---
Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_or_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_OR_B32
Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32()
{
} // ~Inst_SOP2__S_OR_B32
// --- description from .arch file ---
// D.u = S0.u | S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() | src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_OR_B64 class methods ---
Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_or_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_OR_B64
Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64()
{
} // ~Inst_SOP2__S_OR_B64
// --- description from .arch file ---
// D.u64 = S0.u64 | S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() | src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_XOR_B32 class methods ---
Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_xor_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_XOR_B32
Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32()
{
} // ~Inst_SOP2__S_XOR_B32
// --- description from .arch file ---
// D.u = S0.u ^ S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() ^ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_XOR_B64 class methods ---
Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_xor_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_XOR_B64
Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64()
{
} // ~Inst_SOP2__S_XOR_B64
// --- description from .arch file ---
// D.u64 = S0.u64 ^ S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() ^ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ANDN2_B32 class methods ---
Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_andn2_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_ANDN2_B32
Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32()
{
} // ~Inst_SOP2__S_ANDN2_B32
// --- description from .arch file ---
// D.u = S0.u & ~S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() &~ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ANDN2_B64 class methods ---
Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_andn2_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_ANDN2_B64
Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64()
{
} // ~Inst_SOP2__S_ANDN2_B64
// --- description from .arch file ---
// D.u64 = S0.u64 & ~S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() &~ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ORN2_B32 class methods ---
Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_orn2_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_ORN2_B32
Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32()
{
} // ~Inst_SOP2__S_ORN2_B32
// --- description from .arch file ---
// D.u = S0.u | ~S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() |~ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ORN2_B64 class methods ---
Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_orn2_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_ORN2_B64
Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64()
{
} // ~Inst_SOP2__S_ORN2_B64
// --- description from .arch file ---
// D.u64 = S0.u64 | ~S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() |~ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_NAND_B32 class methods ---
Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_nand_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_NAND_B32
Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32()
{
} // ~Inst_SOP2__S_NAND_B32
// --- description from .arch file ---
// D.u = ~(S0.u & S1.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() & src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_NAND_B64 class methods ---
Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_nand_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_NAND_B64
Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64()
{
} // ~Inst_SOP2__S_NAND_B64
// --- description from .arch file ---
// D.u64 = ~(S0.u64 & S1.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() & src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_NOR_B32 class methods ---
Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_nor_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_NOR_B32
Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32()
{
} // ~Inst_SOP2__S_NOR_B32
// --- description from .arch file ---
// D.u = ~(S0.u | S1.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() | src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_NOR_B64 class methods ---
Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_nor_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_NOR_B64
Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64()
{
} // ~Inst_SOP2__S_NOR_B64
// --- description from .arch file ---
// D.u64 = ~(S0.u64 | S1.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() | src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_XNOR_B32 class methods ---
Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_xnor_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_XNOR_B32
Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32()
{
} // ~Inst_SOP2__S_XNOR_B32
// --- description from .arch file ---
// D.u = ~(S0.u ^ S1.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() ^ src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_XNOR_B64 class methods ---
Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_xnor_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_XNOR_B64
Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64()
{
} // ~Inst_SOP2__S_XNOR_B64
// --- description from .arch file ---
// D.u64 = ~(S0.u64 ^ S1.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() ^ src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_LSHL_B32 class methods ---
Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_lshl_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_LSHL_B32
Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32()
{
} // ~Inst_SOP2__S_LSHL_B32
// --- description from .arch file ---
// D.u = S0.u << S1.u[4:0];
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() << bits(src1.rawData(), 4, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_LSHL_B64 class methods ---
Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_lshl_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_LSHL_B64
Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64()
{
} // ~Inst_SOP2__S_LSHL_B64
// --- description from .arch file ---
// D.u64 = S0.u64 << S1.u[5:0];
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() << bits(src1.rawData(), 5, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_LSHR_B32 class methods ---
Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_lshr_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_LSHR_B32
Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32()
{
} // ~Inst_SOP2__S_LSHR_B32
// --- description from .arch file ---
// D.u = S0.u >> S1.u[4:0];
// SCC = 1 if result is non-zero.
// The vacated bits are set to zero.
void
Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_LSHR_B64 class methods ---
Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_lshr_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_LSHR_B64
Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64()
{
} // ~Inst_SOP2__S_LSHR_B64
// --- description from .arch file ---
// D.u64 = S0.u64 >> S1.u[5:0];
// SCC = 1 if result is non-zero.
// The vacated bits are set to zero.
void
Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ASHR_I32 class methods ---
Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_ashr_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_ASHR_I32
Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32()
{
} // ~Inst_SOP2__S_ASHR_I32
// --- description from .arch file ---
// D.i = signext(S0.i) >> S1.u[4:0];
// SCC = 1 if result is non-zero.
// The vacated bits are set to the sign bit of the input value.
void
Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ASHR_I64 class methods ---
Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_ashr_i64")
{
setFlag(ALU);
} // Inst_SOP2__S_ASHR_I64
Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64()
{
} // ~Inst_SOP2__S_ASHR_I64
// --- description from .arch file ---
// D.i64 = signext(S0.i64) >> S1.u[5:0];
// SCC = 1 if result is non-zero.
// The vacated bits are set to the sign bit of the input value.
void
Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_BFM_B32 class methods ---
Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfm_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_BFM_B32
Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32()
{
} // ~Inst_SOP2__S_BFM_B32
// --- description from .arch file ---
// D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask).
void
Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src0.read();
src1.read();
sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1)
<< bits(src1.rawData(), 4, 0);
sdst.write();
} // execute
// --- Inst_SOP2__S_BFM_B64 class methods ---
Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfm_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_BFM_B64
Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64()
{
} // ~Inst_SOP2__S_BFM_B64
// --- description from .arch file ---
// D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask).
void
Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src0.read();
src1.read();
sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1)
<< bits(src1.rawData(), 5, 0);
sdst.write();
} // execute
// --- Inst_SOP2__S_MUL_I32 class methods ---
Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_mul_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_MUL_I32
Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32()
{
} // ~Inst_SOP2__S_MUL_I32
// --- description from .arch file ---
// D.i = S0.i * S1.i.
void
Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src0.read();
src1.read();
sdst = src0.rawData() * src1.rawData();
sdst.write();
} // execute
// --- Inst_SOP2__S_BFE_U32 class methods ---
Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfe_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_BFE_U32
Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32()
{
} // ~Inst_SOP2__S_BFE_U32
// --- description from .arch file ---
// Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
// field width.
// D.u = (S0.u>>S1.u[4:0]) & ((1<<S1.u[22:16])-1);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
& ((1 << bits(src1.rawData(), 22, 16)) - 1);
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_BFE_I32 class methods ---
Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfe_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_BFE_I32
Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32()
{
} // ~Inst_SOP2__S_BFE_I32
// --- description from .arch file ---
// Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
// field width.
// D.i = (S0.i>>S1.u[4:0]) & ((1<<S1.u[22:16])-1);
// Sign-extend the result;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
& ((1 << bits(src1.rawData(), 22, 16)) - 1);
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_BFE_U64 class methods ---
Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfe_u64")
{
setFlag(ALU);
} // Inst_SOP2__S_BFE_U64
Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64()
{
} // ~Inst_SOP2__S_BFE_U64
// --- description from .arch file ---
// Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
// field width.
// D.u64 = (S0.u64>>S1.u[5:0]) & ((1<<S1.u[22:16])-1);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
& ((1 << bits(src1.rawData(), 22, 16)) - 1);
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_BFE_I64 class methods ---
Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfe_i64")
{
setFlag(ALU);
} // Inst_SOP2__S_BFE_I64
Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64()
{
} // ~Inst_SOP2__S_BFE_I64
// --- description from .arch file ---
// Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
// field width.
// D.i64 = (S0.i64>>S1.u[5:0]) & ((1<<S1.u[22:16])-1);
// Sign-extend result;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
& ((1 << bits(src1.rawData(), 22, 16)) - 1);
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_CBRANCH_G_FORK class methods ---
Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_cbranch_g_fork")
{
setFlag(Branch);
} // Inst_SOP2__S_CBRANCH_G_FORK
Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK()
{
} // ~Inst_SOP2__S_CBRANCH_G_FORK
// --- description from .arch file ---
// mask_pass = S0.u64 & EXEC;
// mask_fail = ~S0.u64 & EXEC;
// if(mask_pass == EXEC)
// PC = S1.u64;
// elsif(mask_fail == EXEC)
// PC += 4;
// elsif(bitcount(mask_fail) < bitcount(mask_pass))
// EXEC = mask_fail;
// SGPR[CSP*4] = { S1.u64, mask_pass };
// CSP++;
// PC += 4;
// else
// EXEC = mask_pass;
// SGPR[CSP*4] = { PC + 4, mask_fail };
// CSP++;
// PC = S1.u64;
// end.
// Conditional branch using branch-stack.
// S0 = compare mask(vcc or any sgpr) and
// S1 = 64-bit byte address of target instruction.
// See also S_CBRANCH_JOIN.
void
Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOP2__S_ABSDIFF_I32 class methods ---
Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_absdiff_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_ABSDIFF_I32
Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32()
{
} // ~Inst_SOP2__S_ABSDIFF_I32
// --- description from .arch file ---
// D.i = S0.i - S1.i;
// if(D.i < 0) then D.i = -D.i;
// SCC = 1 if result is non-zero.
// Compute the absolute value of difference between two values.
void
Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
sdst = std::abs(src0.rawData() - src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_RFE_RESTORE_B64 class methods ---
Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64(
InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_rfe_restore_b64")
{
} // Inst_SOP2__S_RFE_RESTORE_B64
Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64()
{
} // ~Inst_SOP2__S_RFE_RESTORE_B64
// --- description from .arch file ---
// PRIV = 0;
// PC = S0.u64;
// INST_ATC = S1.u32[0].
// Return from exception handler and continue, possibly changing the
// --- instruction ATC mode.
// This instruction may only be used within a trap handler.
// Use this instruction when the main program may be in a different memory
// --- space than the trap handler.
void
Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPK__S_MOVK_I32 class methods ---
Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_movk_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_MOVK_I32
Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32()
{
} // ~Inst_SOPK__S_MOVK_I32
// --- description from .arch file ---
// D.i = signext(SIMM16) (sign extension).
void
Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
sdst = simm16;
sdst.write();
} // execute
// --- Inst_SOPK__S_CMOVK_I32 class methods ---
Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmovk_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMOVK_I32
Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32()
{
} // ~Inst_SOPK__S_CMOVK_I32
// --- description from .arch file ---
// if(SCC) then D.i = signext(SIMM16);
// else NOP.
// Conditional move with sign extension.
void
Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
scc.read();
if (scc.rawData()) {
sdst = simm16;
sdst.write();
}
} // execute
// --- Inst_SOPK__S_CMPK_EQ_I32 class methods ---
Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_eq_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_EQ_I32
Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32()
{
} // ~Inst_SOPK__S_CMPK_EQ_I32
// --- description from .arch file ---
// SCC = (S0.i == signext(SIMM16)).
void
Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() == simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LG_I32 class methods ---
Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_lg_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LG_I32
Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32()
{
} // ~Inst_SOPK__S_CMPK_LG_I32
// --- description from .arch file ---
// SCC = (S0.i != signext(SIMM16)).
void
Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() != simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_GT_I32 class methods ---
Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_gt_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_GT_I32
Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32()
{
} // ~Inst_SOPK__S_CMPK_GT_I32
// --- description from .arch file ---
// SCC = (S0.i > signext(SIMM16)).
void
Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() > simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_GE_I32 class methods ---
Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_ge_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_GE_I32
Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32()
{
} // ~Inst_SOPK__S_CMPK_GE_I32
// --- description from .arch file ---
// SCC = (S0.i >= signext(SIMM16)).
void
Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() >= simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LT_I32 class methods ---
Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_lt_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LT_I32
Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32()
{
} // ~Inst_SOPK__S_CMPK_LT_I32
// --- description from .arch file ---
// SCC = (S0.i < signext(SIMM16)).
void
Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() < simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LE_I32 class methods ---
Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_le_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LE_I32
Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32()
{
} // ~Inst_SOPK__S_CMPK_LE_I32
// --- description from .arch file ---
// SCC = (S0.i <= signext(SIMM16)).
void
Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() <= simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_EQ_U32 class methods ---
Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_eq_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_EQ_U32
Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32()
{
} // ~Inst_SOPK__S_CMPK_EQ_U32
// --- description from .arch file ---
// SCC = (S0.u == SIMM16).
void
Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() == simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LG_U32 class methods ---
Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_lg_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LG_U32
Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32()
{
} // ~Inst_SOPK__S_CMPK_LG_U32
// --- description from .arch file ---
// SCC = (S0.u != SIMM16).
void
Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() != simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_GT_U32 class methods ---
Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_gt_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_GT_U32
Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32()
{
} // ~Inst_SOPK__S_CMPK_GT_U32
// --- description from .arch file ---
// SCC = (S0.u > SIMM16).
void
Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() > simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_GE_U32 class methods ---
Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_ge_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_GE_U32
Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32()
{
} // ~Inst_SOPK__S_CMPK_GE_U32
// --- description from .arch file ---
// SCC = (S0.u >= SIMM16).
void
Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() >= simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LT_U32 class methods ---
Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_lt_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LT_U32
Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32()
{
} // ~Inst_SOPK__S_CMPK_LT_U32
// --- description from .arch file ---
// SCC = (S0.u < SIMM16).
void
Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() < simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LE_U32 class methods ---
Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_le_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LE_U32
Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32()
{
} // ~Inst_SOPK__S_CMPK_LE_U32
// --- description from .arch file ---
// SCC = (S0.u <= SIMM16).
void
Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() <= simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_ADDK_I32 class methods ---
Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_addk_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_ADDK_I32
Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32()
{
} // ~Inst_SOPK__S_ADDK_I32
// --- description from .arch file ---
// D.i = D.i + signext(SIMM16);
// SCC = overflow.
void
Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI16 simm16 = instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = src.rawData() + (ScalarRegI32)simm16;
scc = (bits(src.rawData(), 31) == bits(simm16, 15)
&& bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOPK__S_MULK_I32 class methods ---
Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_mulk_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_MULK_I32
Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32()
{
} // ~Inst_SOPK__S_MULK_I32
// --- description from .arch file ---
// D.i = D.i * signext(SIMM16).
void
Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI16 simm16 = instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = src.rawData() * (ScalarRegI32)simm16;
sdst.write();
} // execute
// --- Inst_SOPK__S_CBRANCH_I_FORK class methods ---
Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cbranch_i_fork")
{
setFlag(Branch);
} // Inst_SOPK__S_CBRANCH_I_FORK
Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK()
{
} // ~Inst_SOPK__S_CBRANCH_I_FORK
// --- description from .arch file ---
// mask_pass = S0.u64 & EXEC;
// mask_fail = ~S0.u64 & EXEC;
// target_addr = PC + signext(SIMM16 * 4) + 4;
// if(mask_pass == EXEC)
// PC = target_addr;
// elsif(mask_fail == EXEC)
// PC += 4;
// elsif(bitcount(mask_fail) < bitcount(mask_pass))
// EXEC = mask_fail;
// SGPR[CSP*4] = { target_addr, mask_pass };
// CSP++;
// PC += 4;
// else
// EXEC = mask_pass;
// SGPR[CSP*4] = { PC + 4, mask_fail };
// CSP++;
// PC = target_addr;
// end.
// Conditional branch using branch-stack.
// S0 = compare mask(vcc or any sgpr), and
// SIMM16 = signed DWORD branch offset relative to next instruction.
// See also S_CBRANCH_JOIN.
void
Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPK__S_GETREG_B32 class methods ---
Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_getreg_b32")
{
} // Inst_SOPK__S_GETREG_B32
Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32()
{
} // ~Inst_SOPK__S_GETREG_B32
// --- description from .arch file ---
// D.u = hardware-reg. Read some or all of a hardware register into the
// LSBs of D.
// SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
// is 1..32.
void
Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPK__S_SETREG_B32 class methods ---
Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_setreg_b32")
{
setFlag(ALU);
} // Inst_SOPK__S_SETREG_B32
Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32()
{
} // ~Inst_SOPK__S_SETREG_B32
// --- description from .arch file ---
// hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware
// register.
// SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
// is 1..32.
void
Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI16 simm16 = instData.SIMM16;
ScalarRegU32 hwregId = simm16 & 0x3f;
ScalarRegU32 offset = (simm16 >> 6) & 31;
ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
ScalarOperandU32 hwreg(gpuDynInst, hwregId);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
hwreg.read();
sdst.read();
// Store value from SDST to part of the hardware register.
ScalarRegU32 mask = (((1U << size) - 1U) << offset);
hwreg = ((hwreg.rawData() & ~mask)
| ((sdst.rawData() << offset) & mask));
hwreg.write();
// set MODE register to control the behavior of single precision
// floating-point numbers: denormal mode or round mode
if (hwregId==1 && size==2
&& (offset==4 || offset==0)) {
warn_once("Be cautious that s_setreg_b32 has no real effect "
"on FP modes: %s\n", gpuDynInst->disassemble());
return;
}
// panic if not changing MODE of floating-point numbers
panicUnimplemented();
} // execute
// --- Inst_SOPK__S_SETREG_IMM32_B32 class methods ---
Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32(
InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_setreg_imm32_b32")
{
setFlag(ALU);
} // Inst_SOPK__S_SETREG_IMM32_B32
Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32()
{
} // ~Inst_SOPK__S_SETREG_IMM32_B32
// --- description from .arch file ---
// Write some or all of the LSBs of IMM32 into a hardware register; this
// --- instruction requires a 32-bit literal constant.
// SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
// is 1..32.
void
Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI16 simm16 = instData.SIMM16;
ScalarRegU32 hwregId = simm16 & 0x3f;
ScalarRegU32 offset = (simm16 >> 6) & 31;
ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
ScalarOperandU32 hwreg(gpuDynInst, hwregId);
ScalarRegI32 simm32 = extData.imm_u32;
hwreg.read();
// Store value from SIMM32 to part of the hardware register.
ScalarRegU32 mask = (((1U << size) - 1U) << offset);
hwreg = ((hwreg.rawData() & ~mask)
| ((simm32 << offset) & mask));
hwreg.write();
// set MODE register to control the behavior of single precision
// floating-point numbers: denormal mode or round mode
if (hwregId==1 && size==2
&& (offset==4 || offset==0)) {
warn_once("Be cautious that s_setreg_imm32_b32 has no real effect "
"on FP modes: %s\n", gpuDynInst->disassemble());
return;
}
// panic if not changing modes of single-precision FPs
panicUnimplemented();
} // execute
// --- Inst_SOP1__S_MOV_B32 class methods ---
Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_mov_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_MOV_B32
Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32()
{
} // ~Inst_SOP1__S_MOV_B32
// --- description from .arch file ---
// D.u = S0.u.
void
Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = src.rawData();
sdst.write();
} // execute
// --- Inst_SOP1__S_MOV_B64 class methods ---
Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_mov_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_MOV_B64
Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64()
{
} // ~Inst_SOP1__S_MOV_B64
// --- description from .arch file ---
// D.u64 = S0.u64.
void
Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = src.rawData();
sdst.write();
} // execute
// --- Inst_SOP1__S_CMOV_B32 class methods ---
Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_cmov_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_CMOV_B32
Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32()
{
} // ~Inst_SOP1__S_CMOV_B32
// --- description from .arch file ---
// (SCC) then D.u = S0.u;
// else NOP.
// Conditional move.
void
Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc.read();
if (scc.rawData()) {
sdst = src.rawData();
sdst.write();
}
} // execute
// --- Inst_SOP1__S_CMOV_B64 class methods ---
Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_cmov_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_CMOV_B64
Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64()
{
} // ~Inst_SOP1__S_CMOV_B64
// --- description from .arch file ---
// if(SCC) then D.u64 = S0.u64;
// else NOP.
// Conditional move.
void
Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc.read();
if (scc.rawData()) {
sdst = src.rawData();
sdst.write();
}
} // execute
// --- Inst_SOP1__S_NOT_B32 class methods ---
Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_not_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_NOT_B32
Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32()
{
} // ~Inst_SOP1__S_NOT_B32
// --- description from .arch file ---
// D.u = ~S0.u;
// SCC = 1 if result is non-zero.
// Bitwise negation.
void
Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = ~src.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_NOT_B64 class methods ---
Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_not_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_NOT_B64
Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64()
{
} // ~Inst_SOP1__S_NOT_B64
// --- description from .arch file ---
// D.u64 = ~S0.u64;
// SCC = 1 if result is non-zero.
// Bitwise negation.
void
Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = ~src.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_WQM_B32 class methods ---
Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_wqm_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_WQM_B32
Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32()
{
} // ~Inst_SOP1__S_WQM_B32
// --- description from .arch file ---
// D[i] = (S0[(i & ~3):(i | 3)] != 0);
// Computes whole quad mode for an active/valid mask.
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wholeQuadMode(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_WQM_B64 class methods ---
Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_wqm_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_WQM_B64
Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64()
{
} // ~Inst_SOP1__S_WQM_B64
// --- description from .arch file ---
// D[i] = (S0[(i & ~3):(i | 3)] != 0);
// Computes whole quad mode for an active/valid mask.
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wholeQuadMode(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_BREV_B32 class methods ---
Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_brev_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BREV_B32
Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32()
{
} // ~Inst_SOP1__S_BREV_B32
// --- description from .arch file ---
// D.u[31:0] = S0.u[0:31] (reverse bits).
void
Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = reverseBits(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_BREV_B64 class methods ---
Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_brev_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BREV_B64
Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64()
{
} // ~Inst_SOP1__S_BREV_B64
// --- description from .arch file ---
// D.u64[63:0] = S0.u64[0:63] (reverse bits).
void
Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = reverseBits(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_BCNT0_I32_B32 class methods ---
Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bcnt0_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BCNT0_I32_B32
Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32()
{
} // ~Inst_SOP1__S_BCNT0_I32_B32
// --- description from .arch file ---
// D.i = CountZeroBits(S0.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = countZeroBits(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_BCNT0_I32_B64 class methods ---
Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bcnt0_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BCNT0_I32_B64
Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64()
{
} // ~Inst_SOP1__S_BCNT0_I32_B64
// --- description from .arch file ---
// D.i = CountZeroBits(S0.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = countZeroBits(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_BCNT1_I32_B32 class methods ---
Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bcnt1_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BCNT1_I32_B32
Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32()
{
} // ~Inst_SOP1__S_BCNT1_I32_B32
// --- description from .arch file ---
// D.i = CountOneBits(S0.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = popCount(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_BCNT1_I32_B64 class methods ---
Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bcnt1_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BCNT1_I32_B64
Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64()
{
} // ~Inst_SOP1__S_BCNT1_I32_B64
// --- description from .arch file ---
// D.i = CountOneBits(S0.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = popCount(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_FF0_I32_B32 class methods ---
Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_ff0_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_FF0_I32_B32
Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32()
{
} // ~Inst_SOP1__S_FF0_I32_B32
// --- description from .arch file ---
// D.i = FindFirstZero(S0.u);
// If no zeros are found, return -1.
// Returns the bit position of the first zero from the LSB.
void
Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = findFirstZero(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FF0_I32_B64 class methods ---
Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_ff0_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_FF0_I32_B64
Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64()
{
} // ~Inst_SOP1__S_FF0_I32_B64
// --- description from .arch file ---
// D.i = FindFirstZero(S0.u64);
// If no zeros are found, return -1.
// Returns the bit position of the first zero from the LSB.
void
Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = findFirstZero(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FF1_I32_B32 class methods ---
Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_ff1_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_FF1_I32_B32
Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32()
{
} // ~Inst_SOP1__S_FF1_I32_B32
// --- description from .arch file ---
// D.i = FindFirstOne(S0.u);
// If no ones are found, return -1.
// Returns the bit position of the first one from the LSB.
void
Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = findFirstOne(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FF1_I32_B64 class methods ---
Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_ff1_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_FF1_I32_B64
Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64()
{
} // ~Inst_SOP1__S_FF1_I32_B64
// --- description from .arch file ---
// D.i = FindFirstOne(S0.u64);
// If no ones are found, return -1.
// Returns the bit position of the first one from the LSB.
void
Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = findFirstOne(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FLBIT_I32_B32 class methods ---
Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_flbit_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_FLBIT_I32_B32
Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32()
{
} // ~Inst_SOP1__S_FLBIT_I32_B32
// --- description from .arch file ---
// D.i = FindFirstOne(S0.u);
// If no ones are found, return -1.
// Counts how many zeros before the first one starting from the MSB.
void
Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = countZeroBitsMsb(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FLBIT_I32_B64 class methods ---
Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_flbit_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_FLBIT_I32_B64
Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64()
{
} // ~Inst_SOP1__S_FLBIT_I32_B64
// --- description from .arch file ---
// D.i = FindFirstOne(S0.u64);
// If no ones are found, return -1.
// Counts how many zeros before the first one starting from the MSB.
void
Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = countZeroBitsMsb(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FLBIT_I32 class methods ---
Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_flbit_i32")
{
setFlag(ALU);
} // Inst_SOP1__S_FLBIT_I32
Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32()
{
} // ~Inst_SOP1__S_FLBIT_I32
// --- description from .arch file ---
// D.i = FirstOppositeSignBit(S0.i);
// If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
// Counts how many bits in a row (from MSB to LSB) are the same as the
// sign bit.
void
Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = firstOppositeSignBit(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FLBIT_I32_I64 class methods ---
Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_flbit_i32_i64")
{
setFlag(ALU);
} // Inst_SOP1__S_FLBIT_I32_I64
Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64()
{
} // ~Inst_SOP1__S_FLBIT_I32_I64
// --- description from .arch file ---
// D.i = FirstOppositeSignBit(S0.i64);
// If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
// Counts how many bits in a row (from MSB to LSB) are the same as the
// sign bit.
void
Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = firstOppositeSignBit(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_SEXT_I32_I8 class methods ---
Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_sext_i32_i8")
{
setFlag(ALU);
} // Inst_SOP1__S_SEXT_I32_I8
Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8()
{
} // ~Inst_SOP1__S_SEXT_I32_I8
// --- description from .arch file ---
// D.i = signext(S0.i[7:0]) (sign extension).
void
Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = sext<std::numeric_limits<ScalarRegI8>::digits>(
bits(src.rawData(), 7, 0));
sdst.write();
} // execute
// --- Inst_SOP1__S_SEXT_I32_I16 class methods ---
Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_sext_i32_i16")
{
setFlag(ALU);
} // Inst_SOP1__S_SEXT_I32_I16
Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16()
{
} // ~Inst_SOP1__S_SEXT_I32_I16
// --- description from .arch file ---
// D.i = signext(S0.i[15:0]) (sign extension).
void
Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = sext<std::numeric_limits<ScalarRegI16>::digits>(
bits(src.rawData(), 15, 0));
sdst.write();
} // execute
// --- Inst_SOP1__S_BITSET0_B32 class methods ---
Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bitset0_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BITSET0_B32
Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32()
{
} // ~Inst_SOP1__S_BITSET0_B32
// --- description from .arch file ---
// D.u[S0.u[4:0]] = 0.
void
Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst.setBit(bits(src.rawData(), 4, 0), 0);
sdst.write();
} // execute
// --- Inst_SOP1__S_BITSET0_B64 class methods ---
Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bitset0_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BITSET0_B64
Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64()
{
} // ~Inst_SOP1__S_BITSET0_B64
// --- description from .arch file ---
// D.u64[S0.u[5:0]] = 0.
void
Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst.setBit(bits(src.rawData(), 5, 0), 0);
sdst.write();
} // execute
// --- Inst_SOP1__S_BITSET1_B32 class methods ---
Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bitset1_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BITSET1_B32
Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32()
{
} // ~Inst_SOP1__S_BITSET1_B32
// --- description from .arch file ---
// D.u[S0.u[4:0]] = 1.
void
Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst.setBit(bits(src.rawData(), 4, 0), 1);
sdst.write();
} // execute
// --- Inst_SOP1__S_BITSET1_B64 class methods ---
Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bitset1_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BITSET1_B64
Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64()
{
} // ~Inst_SOP1__S_BITSET1_B64
// --- description from .arch file ---
// D.u64[S0.u[5:0]] = 1.
void
Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst.setBit(bits(src.rawData(), 5, 0), 1);
sdst.write();
} // execute
// --- Inst_SOP1__S_GETPC_B64 class methods ---
Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_getpc_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_GETPC_B64
Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64()
{
} // ~Inst_SOP1__S_GETPC_B64
// --- description from .arch file ---
// D.u64 = PC + 4.
// Destination receives the byte address of the next instruction.
void
Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Addr pc = gpuDynInst->pc();
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
sdst = pc + 4;
sdst.write();
} // execute
// --- Inst_SOP1__S_SETPC_B64 class methods ---
Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_setpc_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_SETPC_B64
Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64()
{
} // ~Inst_SOP1__S_SETPC_B64
// --- description from .arch file ---
// PC = S0.u64.
// S0.u64 is a byte address of the instruction to jump to.
void
Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
src.read();
wf->pc(src.rawData());
} // execute
// --- Inst_SOP1__S_SWAPPC_B64 class methods ---
Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_swappc_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_SWAPPC_B64
Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64()
{
} // ~Inst_SOP1__S_SWAPPC_B64
// --- description from .arch file ---
// D.u64 = PC + 4; PC = S0.u64.
// S0.u64 is a byte address of the instruction to jump to.
void
Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
Addr pc = gpuDynInst->pc();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = pc + 4;
wf->pc(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_RFE_B64 class methods ---
Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_rfe_b64")
{
} // Inst_SOP1__S_RFE_B64
Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64()
{
} // ~Inst_SOP1__S_RFE_B64
// --- description from .arch file ---
// PRIV = 0;
// PC = S0.u64.
// Return from exception handler and continue.
// This instruction may only be used within a trap handler.
void
Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOP1__S_AND_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_and_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_AND_SAVEEXEC_B64
Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_AND_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = S0.u64 & EXEC;
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = src.rawData() & wf->execMask().to_ullong();
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_OR_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_or_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_OR_SAVEEXEC_B64
Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_OR_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = S0.u64 | EXEC;
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = src.rawData() | wf->execMask().to_ullong();
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_XOR_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_xor_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_XOR_SAVEEXEC_B64
Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_XOR_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = S0.u64 ^ EXEC;
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = src.rawData() ^ wf->execMask().to_ullong();
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_ANDN2_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_andn2_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_ANDN2_SAVEEXEC_B64
Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = S0.u64 & ~EXEC;
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = src.rawData() &~ wf->execMask().to_ullong();
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_ORN2_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_orn2_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_ORN2_SAVEEXEC_B64
Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = S0.u64 | ~EXEC;
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = src.rawData() |~ wf->execMask().to_ullong();
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_NAND_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_nand_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_NAND_SAVEEXEC_B64
Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_NAND_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = ~(S0.u64 & EXEC);
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong());
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_NOR_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_nor_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_NOR_SAVEEXEC_B64
Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_NOR_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = ~(S0.u64 | EXEC);
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong());
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_XNOR_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_xnor_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_XNOR_SAVEEXEC_B64
Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = ~(S0.u64 ^ EXEC);
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong());
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_QUADMASK_B32 class methods ---
Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_quadmask_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_QUADMASK_B32
Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32()
{
} // ~Inst_SOP1__S_QUADMASK_B32
// --- description from .arch file ---
// D.u = QuadMask(S0.u):
// D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0;
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = quadMask(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_QUADMASK_B64 class methods ---
Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_quadmask_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_QUADMASK_B64
Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64()
{
} // ~Inst_SOP1__S_QUADMASK_B64
// --- description from .arch file ---
// D.u64 = QuadMask(S0.u64):
// D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0;
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = quadMask(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_MOVRELS_B32 class methods ---
Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_movrels_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_MOVRELS_B32
Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32()
{
} // ~Inst_SOP1__S_MOVRELS_B32
// --- description from .arch file ---
// D.u = SGPR[S0.u + M0.u].u (move from relative source).
void
Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
m0.read();
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData());
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = src.rawData();
sdst.write();
} // execute
// --- Inst_SOP1__S_MOVRELS_B64 class methods ---
Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_movrels_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_MOVRELS_B64
Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64()
{
} // ~Inst_SOP1__S_MOVRELS_B64
// --- description from .arch file ---
// D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source).
// The index in M0.u must be even for this operation.
void
Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
m0.read();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData());
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = src.rawData();
sdst.write();
} // execute
// --- Inst_SOP1__S_MOVRELD_B32 class methods ---
Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_movreld_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_MOVRELD_B32
Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32()
{
} // ~Inst_SOP1__S_MOVRELD_B32
// --- description from .arch file ---
// SGPR[D.u + M0.u].u = S0.u (move to relative destination).
void
Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
m0.read();
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData());
src.read();
sdst = src.rawData();
sdst.write();
} // execute
// --- Inst_SOP1__S_MOVRELD_B64 class methods ---
Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_movreld_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_MOVRELD_B64
Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64()
{
} // ~Inst_SOP1__S_MOVRELD_B64
// --- description from .arch file ---
// SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination).
// The index in M0.u must be even for this operation.
void
Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
m0.read();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData());
src.read();
sdst = src.rawData();
sdst.write();
} // execute
// --- Inst_SOP1__S_CBRANCH_JOIN class methods ---
Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_cbranch_join")
{
setFlag(Branch);
setFlag(WritesEXEC);
} // Inst_SOP1__S_CBRANCH_JOIN
Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN()
{
} // ~Inst_SOP1__S_CBRANCH_JOIN
// --- description from .arch file ---
// saved_csp = S0.u;
// if(CSP == saved_csp) then
// PC += 4; // Second time to JOIN: continue with program.
// else
// CSP -= 1; // First time to JOIN; jump to other FORK path.
// {PC, EXEC} = SGPR[CSP * 4]; // Read 128 bits from 4 consecutive
// SGPRs.
// end
// Conditional branch join point (end of conditional branch block). S0 is
// saved CSP value.
// See S_CBRANCH_G_FORK and S_CBRANCH_I_FORK for related instructions.
void
Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOP1__S_ABS_I32 class methods ---
Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_abs_i32")
{
setFlag(ALU);
} // Inst_SOP1__S_ABS_I32
Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32()
{
} // ~Inst_SOP1__S_ABS_I32
// --- description from .arch file ---
// if(S.i < 0) then D.i = -S.i;
// else D.i = S.i;
// SCC = 1 if result is non-zero.
// Integer absolute value.
void
Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = std::abs(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_MOV_FED_B32 class methods ---
Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_mov_fed_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_MOV_FED_B32
Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32()
{
} // ~Inst_SOP1__S_MOV_FED_B32
// --- description from .arch file ---
// D.u = S0.u. Introduce an EDC double-detect error on write to the
// destination SGPR.
void
Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOP1__S_SET_GPR_IDX_IDX class methods ---
Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_set_gpr_idx_idx")
{
} // Inst_SOP1__S_SET_GPR_IDX_IDX
Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX()
{
} // ~Inst_SOP1__S_SET_GPR_IDX_IDX
// --- description from .arch file ---
// M0[7:0] = S0.u[7:0].
// Modify the index used in vector GPR indexing.
void
Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPC__S_CMP_EQ_I32 class methods ---
Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_eq_i32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_EQ_I32
Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32()
{
} // ~Inst_SOPC__S_CMP_EQ_I32
// --- description from .arch file ---
// SCC = (S0.i == S1.i).
void
Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_LG_I32 class methods ---
Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_lg_i32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_LG_I32
Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32()
{
} // ~Inst_SOPC__S_CMP_LG_I32
// --- description from .arch file ---
// SCC = (S0.i != S1.i).
void
Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_GT_I32 class methods ---
Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_gt_i32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_GT_I32
Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32()
{
} // ~Inst_SOPC__S_CMP_GT_I32
// --- description from .arch file ---
// SCC = (S0.i > S1.i).
void
Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_GE_I32 class methods ---
Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_ge_i32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_GE_I32
Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32()
{
} // ~Inst_SOPC__S_CMP_GE_I32
// --- description from .arch file ---
// SCC = (S0.i >= S1.i).
void
Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() >= src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_LT_I32 class methods ---
Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_lt_i32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_LT_I32
Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32()
{
} // ~Inst_SOPC__S_CMP_LT_I32
// --- description from .arch file ---
// SCC = (S0.i < S1.i).
void
Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_LE_I32 class methods ---
Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_le_i32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_LE_I32
Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32()
{
} // ~Inst_SOPC__S_CMP_LE_I32
// --- description from .arch file ---
// SCC = (S0.i <= S1.i).
void
Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_EQ_U32 class methods ---
Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_eq_u32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_EQ_U32
Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32()
{
} // ~Inst_SOPC__S_CMP_EQ_U32
// --- description from .arch file ---
// SCC = (S0.u == S1.u).
void
Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_LG_U32 class methods ---
Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_lg_u32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_LG_U32
Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32()
{
} // ~Inst_SOPC__S_CMP_LG_U32
// --- description from .arch file ---
// SCC = (S0.u != S1.u).
void
Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_GT_U32 class methods ---
Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_gt_u32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_GT_U32
Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32()
{
} // ~Inst_SOPC__S_CMP_GT_U32
// --- description from .arch file ---
// SCC = (S0.u > S1.u).
void
Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_GE_U32 class methods ---
Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_ge_u32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_GE_U32
Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32()
{
} // ~Inst_SOPC__S_CMP_GE_U32
// --- description from .arch file ---
// SCC = (S0.u >= S1.u).
void
Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() >= src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_LT_U32 class methods ---
Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_lt_u32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_LT_U32
Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32()
{
} // ~Inst_SOPC__S_CMP_LT_U32
// --- description from .arch file ---
// SCC = (S0.u < S1.u).
void
Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_LE_U32 class methods ---
Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_le_u32")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_LE_U32
Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32()
{
} // ~Inst_SOPC__S_CMP_LE_U32
// --- description from .arch file ---
// SCC = (S0.u <= S1.u).
void
Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_BITCMP0_B32 class methods ---
Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_bitcmp0_b32")
{
setFlag(ALU);
} // Inst_SOPC__S_BITCMP0_B32
Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32()
{
} // ~Inst_SOPC__S_BITCMP0_B32
// --- description from .arch file ---
// SCC = (S0.u[S1.u[4:0]] == 0).
void
Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_BITCMP1_B32 class methods ---
Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_bitcmp1_b32")
{
setFlag(ALU);
} // Inst_SOPC__S_BITCMP1_B32
Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32()
{
} // ~Inst_SOPC__S_BITCMP1_B32
// --- description from .arch file ---
// SCC = (S0.u[S1.u[4:0]] == 1).
void
Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_BITCMP0_B64 class methods ---
Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_bitcmp0_b64")
{
setFlag(ALU);
} // Inst_SOPC__S_BITCMP0_B64
Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64()
{
} // ~Inst_SOPC__S_BITCMP0_B64
// --- description from .arch file ---
// SCC = (S0.u64[S1.u[5:0]] == 0).
void
Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_BITCMP1_B64 class methods ---
Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_bitcmp1_b64")
{
setFlag(ALU);
} // Inst_SOPC__S_BITCMP1_B64
Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64()
{
} // ~Inst_SOPC__S_BITCMP1_B64
// --- description from .arch file ---
// SCC = (S0.u64[S1.u[5:0]] == 1).
void
Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_SETVSKIP class methods ---
Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_setvskip")
{
} // Inst_SOPC__S_SETVSKIP
Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP()
{
} // ~Inst_SOPC__S_SETVSKIP
// --- description from .arch file ---
// VSKIP = S0.u[S1.u[4:0]].
// Enables and disables VSKIP mode.
// When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are
// issued.
// If any vector operations are outstanding, S_WAITCNT must be issued
// before executing.
// This instruction requires one waitstate after executing (e.g. S_NOP 0).
// Example:
// s_waitcnt 0
// s_setvskip 1, 0 // Enable vskip mode.
// s_nop 1
void
Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPC__S_SET_GPR_IDX_ON class methods ---
Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_set_gpr_idx_on")
{
} // Inst_SOPC__S_SET_GPR_IDX_ON
Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON()
{
} // ~Inst_SOPC__S_SET_GPR_IDX_ON
// --- description from .arch file ---
// MODE.gpr_idx_en = 1;
// M0[7:0] = S0.u[7:0];
// M0[15:12] = SIMM4 (direct contents of S1 field);
// // Remaining bits of M0 are unmodified.
// Enable GPR indexing mode. Vector operations after this will perform
// relative GPR addressing based on the contents of M0. The structure
// SQ_M0_GPR_IDX_WORD may be used to decode M0.
// The raw contents of the S1 field are read and used to set the enable
// bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and
// S1[3] = VDST_REL.
void
Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPC__S_CMP_EQ_U64 class methods ---
Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_eq_u64")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_EQ_U64
Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64()
{
} // ~Inst_SOPC__S_CMP_EQ_U64
// --- description from .arch file ---
// SCC = (S0.i64 == S1.i64).
void
Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPC__S_CMP_LG_U64 class methods ---
Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt)
: Inst_SOPC(iFmt, "s_cmp_lg_u64")
{
setFlag(ALU);
} // Inst_SOPC__S_CMP_LG_U64
Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64()
{
} // ~Inst_SOPC__S_CMP_LG_U64
// --- description from .arch file ---
// SCC = (S0.i64 != S1.i64).
void
Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPP__S_NOP class methods ---
Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_nop")
{
setFlag(Nop);
} // Inst_SOPP__S_NOP
Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP()
{
} // ~Inst_SOPP__S_NOP
// --- description from .arch file ---
// Do nothing. Repeat NOP 1..8 times based on SIMM16[2:0] -- 0 = 1 time,
// 7 = 8 times.
// This instruction may be used to introduce wait states to resolve
// hazards; see the shader programming guide for details. Compare with
// S_SLEEP.
void
Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_SOPP__S_ENDPGM class methods ---
Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_endpgm")
{
setFlag(EndOfKernel);
} // Inst_SOPP__S_ENDPGM
Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM()
{
} // ~Inst_SOPP__S_ENDPGM
// --- description from .arch file ---
// End of program; terminate wavefront.
// The hardware implicitly executes S_WAITCNT 0 before executing this
// --- instruction.
// See S_ENDPGM_SAVED for the context-switch version of this instruction.
void
Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ComputeUnit *cu = gpuDynInst->computeUnit();
// delete extra instructions fetched for completed work-items
wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1,
wf->instructionBuffer.end());
if (wf->pendingFetch) {
wf->dropFetch = true;
}
wf->computeUnit->fetchStage.fetchUnit(wf->simdId)
.flushBuf(wf->wfSlotId);
wf->setStatus(Wavefront::S_STOPPED);
int refCount = wf->computeUnit->getLds()
.decreaseRefCounter(wf->dispatchId, wf->wgId);
/**
* The parent WF of this instruction is exiting, therefore
* it should not participate in this barrier any longer. This
* prevents possible deadlock issues if WFs exit early.
*/
int bar_id = WFBarrier::InvalidID;
if (wf->hasBarrier()) {
assert(wf->getStatus() != Wavefront::S_BARRIER);
bar_id = wf->barrierId();
assert(bar_id != WFBarrier::InvalidID);
wf->releaseBarrier();
cu->decMaxBarrierCnt(bar_id);
DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the "
"program and decrementing max barrier count for "
"barrier Id%d. New max count: %d.\n", cu->cu_id,
wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id,
cu->maxBarrierCnt(bar_id));
}
DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
wf->computeUnit->cu_id, wf->wgId, refCount);
wf->computeUnit->registerManager->freeRegisters(wf);
wf->computeUnit->stats.completedWfs++;
wf->computeUnit->activeWaves--;
panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less "
"than zero\n", wf->computeUnit->cu_id);
DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId);
for (int i = 0; i < wf->vecReads.size(); i++) {
if (wf->rawDist.find(i) != wf->rawDist.end()) {
wf->stats.readsPerWrite.sample(wf->vecReads.at(i));
}
}
wf->vecReads.clear();
wf->rawDist.clear();
wf->lastInstExec = 0;
if (!refCount) {
/**
* If all WFs have finished, and hence the WG has finished,
* then we can free up the barrier belonging to the parent
* WG, but only if we actually used a barrier (i.e., more
* than one WF in the WG).
*/
if (bar_id != WFBarrier::InvalidID) {
DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are "
"now complete. Releasing barrier Id%d.\n", cu->cu_id,
wf->simdId, wf->wfSlotId, wf->wfDynId,
wf->barrierId());
cu->releaseBarrier(bar_id);
}
/**
* Last wavefront of the workgroup has executed return. If the
* workgroup is not the final one in the kernel, then simply
* retire it; however, if it is the final one, i.e., indicating
* the kernel end, then release operation (i.e., GL2 WB) is
* needed
*/
//check whether the workgroup is indicating the kernel end, i.e.,
//the last workgroup in the kernel
bool kernelEnd =
wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf);
bool relNeeded =
wf->computeUnit->shader->impl_kern_end_rel;
//if it is not a kernel end, then retire the workgroup directly
if (!kernelEnd || !relNeeded) {
wf->computeUnit->shader->dispatcher().notifyWgCompl(wf);
wf->setStatus(Wavefront::S_STOPPED);
wf->computeUnit->stats.completedWGs++;
return;
}
/**
* if it is a kernel end, inject a memory sync, i.e., GL2 WB, and
* retire the workgroup after receving response.
* note that GL0V and GL1 are read only, and they just forward GL2
* WB request. When forwarding, GL1 send the request to all GL2 in
* the complex
*/
setFlag(MemSync);
setFlag(GlobalSegment);
// Notify Memory System of Kernel Completion
// Kernel End = isKernel + isMemSync
wf->setStatus(Wavefront::S_RETURNING);
gpuDynInst->simdId = wf->simdId;
gpuDynInst->wfSlotId = wf->wfSlotId;
gpuDynInst->wfDynId = wf->wfDynId;
DPRINTF(GPUExec, "inject global memory fence for CU%d: "
"WF[%d][%d][%d]\n", wf->computeUnit->cu_id,
wf->simdId, wf->wfSlotId, wf->wfDynId);
// call shader to prepare the flush operations
wf->computeUnit->shader->prepareFlush(gpuDynInst);
wf->computeUnit->stats.completedWGs++;
} else {
wf->computeUnit->shader->dispatcher().scheduleDispatch();
}
} // execute
// --- Inst_SOPP__S_BRANCH class methods ---
Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_branch")
{
setFlag(Branch);
} // Inst_SOPP__S_BRANCH
Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH()
{
} // ~Inst_SOPP__S_BRANCH
// --- description from .arch file ---
// PC = PC + signext(SIMM16 * 4) + 4 (short jump).
// For a long jump, use S_SETPC.
void
Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
Addr pc = gpuDynInst->pc();
ScalarRegI16 simm16 = instData.SIMM16;
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
wf->pc(pc);
} // execute
// --- Inst_SOPP__S_WAKEUP class methods ---
Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_wakeup")
{
} // Inst_SOPP__S_WAKEUP
Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP()
{
} // ~Inst_SOPP__S_WAKEUP
// --- description from .arch file ---
// Allow a wave to 'ping' all the other waves in its threadgroup to force
// them to wake up immediately from an S_SLEEP instruction. The ping is
// ignored if the waves are not sleeping.
// This allows for more efficient polling on a memory location. The waves
// which are polling can sit in a long S_SLEEP between memory reads, but
// the wave which writes the value can tell them all to wake up early now
// that the data is available. This is useful for fBarrier implementations
// (speedup).
// This method is also safe from races because if any wave misses the ping,
// everything still works fine (whoever missed it just completes their
// normal S_SLEEP).
void
Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_CBRANCH_SCC0 class methods ---
Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_scc0")
{
setFlag(Branch);
} // Inst_SOPP__S_CBRANCH_SCC0
Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0()
{
} // ~Inst_SOPP__S_CBRANCH_SCC0
// --- description from .arch file ---
// if(SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
Addr pc = gpuDynInst->pc();
ScalarRegI16 simm16 = instData.SIMM16;
ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
scc.read();
if (!scc.rawData()) {
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
}
wf->pc(pc);
} // execute
// --- Inst_SOPP__S_CBRANCH_SCC1 class methods ---
Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_scc1")
{
setFlag(Branch);
} // Inst_SOPP__S_CBRANCH_SCC1
Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1()
{
} // ~Inst_SOPP__S_CBRANCH_SCC1
// --- description from .arch file ---
// if(SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
Addr pc = gpuDynInst->pc();
ScalarRegI16 simm16 = instData.SIMM16;
ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
scc.read();
if (scc.rawData()) {
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
}
wf->pc(pc);
} // execute
// --- Inst_SOPP__S_CBRANCH_VCCZ class methods ---
Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_vccz")
{
setFlag(Branch);
setFlag(ReadsVCC);
} // Inst_SOPP__S_CBRANCH_VCCZ
Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ()
{
} // ~Inst_SOPP__S_CBRANCH_VCCZ
// --- description from .arch file ---
// if(VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
Addr pc = gpuDynInst->pc();
ScalarRegI16 simm16 = instData.SIMM16;
vcc.read();
if (!vcc.rawData()) {
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
}
wf->pc(pc);
} // execute
// --- Inst_SOPP__S_CBRANCH_VCCNZ class methods ---
Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_vccnz")
{
setFlag(Branch);
setFlag(ReadsVCC);
} // Inst_SOPP__S_CBRANCH_VCCNZ
Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ()
{
} // ~Inst_SOPP__S_CBRANCH_VCCNZ
// --- description from .arch file ---
// if(VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
vcc.read();
if (vcc.rawData()) {
Addr pc = gpuDynInst->pc();
ScalarRegI16 simm16 = instData.SIMM16;
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
wf->pc(pc);
}
} // execute
// --- Inst_SOPP__S_CBRANCH_EXECZ class methods ---
Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_execz")
{
setFlag(Branch);
setFlag(ReadsEXEC);
} // Inst_SOPP__S_CBRANCH_EXECZ
Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ()
{
} // ~Inst_SOPP__S_CBRANCH_EXECZ
// --- description from .arch file ---
// if(EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (wf->execMask().none()) {
Addr pc = gpuDynInst->pc();
ScalarRegI16 simm16 = instData.SIMM16;
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
wf->pc(pc);
}
} // execute
// --- Inst_SOPP__S_CBRANCH_EXECNZ class methods ---
Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_execnz")
{
setFlag(Branch);
setFlag(ReadsEXEC);
} // Inst_SOPP__S_CBRANCH_EXECNZ
Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ()
{
} // ~Inst_SOPP__S_CBRANCH_EXECNZ
// --- description from .arch file ---
// if(EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (wf->execMask().any()) {
Addr pc = gpuDynInst->pc();
ScalarRegI16 simm16 = instData.SIMM16;
pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
wf->pc(pc);
}
} // execute
// --- Inst_SOPP__S_BARRIER class methods ---
Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_barrier")
{
setFlag(MemBarrier);
} // Inst_SOPP__S_BARRIER
Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER()
{
} // ~Inst_SOPP__S_BARRIER
// --- description from .arch file ---
// Synchronize waves within a threadgroup.
// If not all waves of the threadgroup have been created yet, waits for
// entire group before proceeding.
// If some waves in the threadgroup have already terminated, this waits on
// only the surviving waves.
// Barriers are legal inside trap handlers.
void
Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ComputeUnit *cu = gpuDynInst->computeUnit();
if (wf->hasBarrier()) {
int bar_id = wf->barrierId();
assert(wf->getStatus() == Wavefront::S_BARRIER);
cu->incNumAtBarrier(bar_id);
DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at "
"barrier Id%d. %d waves now at barrier, %d waves "
"remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId,
wf->wfDynId, bar_id, cu->numAtBarrier(bar_id),
cu->numYetToReachBarrier(bar_id));
}
} // execute
// --- Inst_SOPP__S_SETKILL class methods ---
Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_setkill")
{
} // Inst_SOPP__S_SETKILL
Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL()
{
} // ~Inst_SOPP__S_SETKILL
// --- description from .arch file ---
// set KILL bit to value of SIMM16[0].
// Used primarily for debugging kill wave host command behavior.
void
Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_WAITCNT class methods ---
Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_waitcnt")
{
setFlag(ALU);
setFlag(Waitcnt);
} // Inst_SOPP__S_WAITCNT
Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT()
{
} // ~Inst_SOPP__S_WAITCNT
// --- description from .arch file ---
// Wait for the counts of outstanding lds, vector-memory and
// --- export/vmem-write-data to be at or below the specified levels.
// SIMM16[3:0] = vmcount (vector memory operations),
// SIMM16[6:4] = export/mem-write-data count,
// SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count).
void
Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 vm_cnt = 0;
ScalarRegI32 exp_cnt = 0;
ScalarRegI32 lgkm_cnt = 0;
vm_cnt = bits<ScalarRegI16>(instData.SIMM16, 3, 0);
exp_cnt = bits<ScalarRegI16>(instData.SIMM16, 6, 4);
lgkm_cnt = bits<ScalarRegI16>(instData.SIMM16, 12, 8);
gpuDynInst->wavefront()->setStatus(Wavefront::S_WAITCNT);
gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt);
} // execute
// --- Inst_SOPP__S_SETHALT class methods ---
Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_sethalt")
{
} // Inst_SOPP__S_SETHALT
Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT()
{
} // ~Inst_SOPP__S_SETHALT
// --- description from .arch file ---
// Set HALT bit to value of SIMM16[0]; 1 = halt, 0 = resume.
// The halt flag is ignored while PRIV == 1 (inside trap handlers) but the
// shader will halt immediately after the handler returns if HALT is still
// set at that time.
void
Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_SLEEP class methods ---
Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_sleep")
{
} // Inst_SOPP__S_SLEEP
Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP()
{
} // ~Inst_SOPP__S_SLEEP
// --- description from .arch file ---
// Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks.
// The exact amount of delay is approximate. Compare with S_NOP.
void
Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_SETPRIO class methods ---
Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_setprio")
{
} // Inst_SOPP__S_SETPRIO
Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO()
{
} // ~Inst_SOPP__S_SETPRIO
// --- description from .arch file ---
// User settable wave priority is set to SIMM16[1:0]. 0 = lowest,
// 3 = highest.
// The overall wave priority is {SPIPrio[1:0] + UserPrio[1:0],
// WaveAge[3:0]}.
void
Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_SENDMSG class methods ---
Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_sendmsg")
{
} // Inst_SOPP__S_SENDMSG
Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG()
{
} // ~Inst_SOPP__S_SENDMSG
// --- description from .arch file ---
// Send a message upstream to VGT or the interrupt handler.
// SIMM16[9:0] contains the message type and is documented in the shader
// --- programming guide.
void
Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_SENDMSGHALT class methods ---
Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_sendmsghalt")
{
} // Inst_SOPP__S_SENDMSGHALT
Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT()
{
} // ~Inst_SOPP__S_SENDMSGHALT
// --- description from .arch file ---
// Send a message and then HALT the wavefront; see S_SENDMSG for details.
void
Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_TRAP class methods ---
Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_trap")
{
} // Inst_SOPP__S_TRAP
Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP()
{
} // ~Inst_SOPP__S_TRAP
// --- description from .arch file ---
// TrapID = SIMM16[7:0];
// Wait for all instructions to complete;
// set {TTMP1, TTMP0} = {3'h0, PCRewind[3:0], HT[0], TrapID[7:0],
// PC[47:0]};
// PC = TBA (trap base address);
// PRIV = 1.
// Enter the trap handler. This instruction may be generated internally as
// well in response to a host trap (HT = 1) or an exception.
// TrapID 0 is reserved for hardware use and should not be used in a
// shader-generated trap.
void
Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_ICACHE_INV class methods ---
Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_icache_inv")
{
} // Inst_SOPP__S_ICACHE_INV
Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV()
{
} // ~Inst_SOPP__S_ICACHE_INV
// --- description from .arch file ---
// Invalidate entire L1 instruction cache.
// You must have 12 separate S_NOP instructions or a jump/branch
// instruction after this instruction
// to ensure the SQ instruction buffer is purged.
void
Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_INCPERFLEVEL class methods ---
Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_incperflevel")
{
} // Inst_SOPP__S_INCPERFLEVEL
Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL()
{
} // ~Inst_SOPP__S_INCPERFLEVEL
// --- description from .arch file ---
// Increment performance counter specified in SIMM16[3:0] by 1.
void
Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_DECPERFLEVEL class methods ---
Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_decperflevel")
{
} // Inst_SOPP__S_DECPERFLEVEL
Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL()
{
} // ~Inst_SOPP__S_DECPERFLEVEL
// --- description from .arch file ---
// Decrement performance counter specified in SIMM16[3:0] by 1.
void
Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_TTRACEDATA class methods ---
Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_ttracedata")
{
} // Inst_SOPP__S_TTRACEDATA
Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA()
{
} // ~Inst_SOPP__S_TTRACEDATA
// --- description from .arch file ---
// Send M0 as user data to the thread trace stream.
void
Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_CBRANCH_CDBGSYS class methods ---
Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS(
InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_cdbgsys")
{
setFlag(Branch);
} // Inst_SOPP__S_CBRANCH_CDBGSYS
Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS()
{
} // ~Inst_SOPP__S_CBRANCH_CDBGSYS
// --- description from .arch file ---
// if(conditional_debug_system != 0) then PC = PC + signext(SIMM16 * 4)
// + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_CBRANCH_CDBGUSER class methods ---
Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER(
InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_cdbguser")
{
setFlag(Branch);
} // Inst_SOPP__S_CBRANCH_CDBGUSER
Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER()
{
} // ~Inst_SOPP__S_CBRANCH_CDBGUSER
// --- description from .arch file ---
// if(conditional_debug_user != 0) then PC = PC + signext(SIMM16 * 4) + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER class methods ---
Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(
InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user")
{
setFlag(Branch);
} // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::
~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER()
{
} // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
// --- description from .arch file ---
// if(conditional_debug_system || conditional_debug_user) then PC = PC +
// --- signext(SIMM16 * 4) + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER class methods ---
Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user")
{
setFlag(Branch);
} // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER()
{
} // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
// --- description from .arch file ---
// if(conditional_debug_system && conditional_debug_user) then PC = PC +
// --- signext(SIMM16 * 4) + 4;
// else NOP.
void
Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_ENDPGM_SAVED class methods ---
Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_endpgm_saved")
{
} // Inst_SOPP__S_ENDPGM_SAVED
Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED()
{
} // ~Inst_SOPP__S_ENDPGM_SAVED
// --- description from .arch file ---
// End of program; signal that a wave has been saved by the context-switch
// trap handler and terminate wavefront.
// The hardware implicitly executes S_WAITCNT 0 before executing this
// instruction.
// Use S_ENDPGM in all cases unless you are executing the context-switch
// save handler.
void
Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_SET_GPR_IDX_OFF class methods ---
Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF(
InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_set_gpr_idx_off")
{
} // Inst_SOPP__S_SET_GPR_IDX_OFF
Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF()
{
} // ~Inst_SOPP__S_SET_GPR_IDX_OFF
// --- description from .arch file ---
// MODE.gpr_idx_en = 0.
// Clear GPR indexing mode. Vector operations after this will not perform
// --- relative GPR addressing regardless of the contents of M0. This
// --- instruction does not modify M0.
void
Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPP__S_SET_GPR_IDX_MODE class methods ---
Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE(
InFmt_SOPP *iFmt)
: Inst_SOPP(iFmt, "s_set_gpr_idx_mode")
{
} // Inst_SOPP__S_SET_GPR_IDX_MODE
Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE()
{
} // ~Inst_SOPP__S_SET_GPR_IDX_MODE
// --- description from .arch file ---
// M0[15:12] = SIMM4.
// Modify the mode used for vector GPR indexing.
// The raw contents of the source field are read and used to set the enable
// bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL
// and SIMM4[3] = VDST_REL.
void
Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SMEM__S_LOAD_DWORD class methods ---
Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_load_dword")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_LOAD_DWORD
Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD()
{
} // ~Inst_SMEM__S_LOAD_DWORD
/**
* Read 1 dword from scalar data cache. If the offset is specified as an
* sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are
* ignored). If the offset is specified as an immediate 20-bit constant,
* the constant is an unsigned byte offset.
*/
void
Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
addr.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, addr, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe
.issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<1>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
{
ScalarOperandU32 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_LOAD_DWORDX2 class methods ---
Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_load_dwordx2")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_LOAD_DWORDX2
Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2()
{
} // ~Inst_SMEM__S_LOAD_DWORDX2
/**
* Read 2 dwords from scalar data cache. See s_load_dword for details on
* the offset input.
*/
void
Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
addr.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, addr, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe.
issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<2>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
{
ScalarOperandU64 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_LOAD_DWORDX4 class methods ---
Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_load_dwordx4")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_LOAD_DWORDX4
Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4()
{
} // ~Inst_SMEM__S_LOAD_DWORDX4
// --- description from .arch file ---
// Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
// the offset input.
void
Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
addr.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, addr, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe.
issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<4>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
{
ScalarOperandU128 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_LOAD_DWORDX8 class methods ---
Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_load_dwordx8")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_LOAD_DWORDX8
Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8()
{
} // ~Inst_SMEM__S_LOAD_DWORDX8
// --- description from .arch file ---
// Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
// the offset input.
void
Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
addr.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, addr, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe.
issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<8>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst)
{
ScalarOperandU256 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_LOAD_DWORDX16 class methods ---
Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_load_dwordx16")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_LOAD_DWORDX16
Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16()
{
} // ~Inst_SMEM__S_LOAD_DWORDX16
// --- description from .arch file ---
// Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
// the offset input.
void
Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
addr.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, addr, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe.
issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<16>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
{
ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_BUFFER_LOAD_DWORD class methods ---
Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD(
InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_buffer_load_dword")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_BUFFER_LOAD_DWORD
Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD()
{
} // ~Inst_SMEM__S_BUFFER_LOAD_DWORD
// --- description from .arch file ---
// Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the
// --- offset input.
void
Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
rsrcDesc.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, rsrcDesc, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe
.issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<1>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
{
// 1 request, size 32
ScalarOperandU32 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_BUFFER_LOAD_DWORDX2 class methods ---
Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2(
InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_buffer_load_dwordx2")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_BUFFER_LOAD_DWORDX2
Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2()
{
} // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2
// --- description from .arch file ---
// Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on
// the offset input.
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
rsrcDesc.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, rsrcDesc, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe
.issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<2>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
{
// use U64 because 2 requests, each size 32
ScalarOperandU64 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_BUFFER_LOAD_DWORDX4 class methods ---
Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4(
InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_buffer_load_dwordx4")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_BUFFER_LOAD_DWORDX4
Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4()
{
} // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4
// --- description from .arch file ---
// Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
// the offset input.
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
rsrcDesc.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, rsrcDesc, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe
.issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<4>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
{
// 4 requests, each size 32
ScalarOperandU128 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_BUFFER_LOAD_DWORDX8 class methods ---
Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8(
InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_buffer_load_dwordx8")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_BUFFER_LOAD_DWORDX8
Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8()
{
} // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8
// --- description from .arch file ---
// Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
// the offset input.
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
rsrcDesc.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, rsrcDesc, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe
.issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<8>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst)
{
// 8 requests, each size 32
ScalarOperandU256 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_BUFFER_LOAD_DWORDX16 class methods ---
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16(
InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_buffer_load_dwordx16")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_SMEM__S_BUFFER_LOAD_DWORDX16
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16()
{
} // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16
// --- description from .arch file ---
// Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
// the offset input.
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
rsrcDesc.read();
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, rsrcDesc, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe
.issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<16>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
{
// 16 requests, each size 32
ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
sdst.write();
} // completeAcc
// --- Inst_SMEM__S_STORE_DWORD class methods ---
Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_store_dword")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_SMEM__S_STORE_DWORD
Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD()
{
} // ~Inst_SMEM__S_STORE_DWORD
// --- description from .arch file ---
// Write 1 dword to scalar data cache.
// If the offset is specified as an SGPR, the SGPR contains an unsigned
// BYTE offset (the 2 LSBs are ignored).
// If the offset is specified as an immediate 20-bit constant, the
// constant is an unsigned BYTE offset.
void
Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA);
addr.read();
sdata.read();
std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
sizeof(ScalarRegU32));
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, addr, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe.
issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<1>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_SMEM__S_STORE_DWORDX2 class methods ---
Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_store_dwordx2")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_SMEM__S_STORE_DWORDX2
Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2()
{
} // ~Inst_SMEM__S_STORE_DWORDX2
// --- description from .arch file ---
// Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
// the offset input.
void
Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA);
addr.read();
sdata.read();
std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
sizeof(ScalarRegU64));
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, addr, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe.
issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<2>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_SMEM__S_STORE_DWORDX4 class methods ---
Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_store_dwordx4")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_SMEM__S_STORE_DWORDX4
Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4()
{
} // ~Inst_SMEM__S_STORE_DWORDX4
// --- description from .arch file ---
// Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
// the offset input.
void
Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ScalarRegU32 offset(0);
ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA);
addr.read();
sdata.read();
std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
sizeof(gpuDynInst->scalar_data));
if (instData.IMM) {
offset = extData.OFFSET;
} else {
ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
off_sgpr.read();
offset = off_sgpr.rawData();
}
calcAddr(gpuDynInst, addr, offset);
gpuDynInst->computeUnit()->scalarMemoryPipe.
issueRequest(gpuDynInst);
} // execute
void
Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<4>(gpuDynInst);
} // initiateAcc
void
Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_SMEM__S_BUFFER_STORE_DWORD class methods ---
Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD(
InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_buffer_store_dword")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_SMEM__S_BUFFER_STORE_DWORD
Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD()
{
} // ~Inst_SMEM__S_BUFFER_STORE_DWORD
// --- description from .arch file ---
// Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the
// --- offset input.
void
Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_SMEM__S_BUFFER_STORE_DWORDX2 class methods ---
Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2(
InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_buffer_store_dwordx2")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_SMEM__S_BUFFER_STORE_DWORDX2
Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2()
{
} // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2
// --- description from .arch file ---
// Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
// the offset input.
void
Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_SMEM__S_BUFFER_STORE_DWORDX4 class methods ---
Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4(
InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_buffer_store_dwordx4")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_SMEM__S_BUFFER_STORE_DWORDX4
Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4()
{
} // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4
// --- description from .arch file ---
// Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
// the offset input.
void
Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_SMEM__S_DCACHE_INV class methods ---
Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_dcache_inv")
{
} // Inst_SMEM__S_DCACHE_INV
Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV()
{
} // ~Inst_SMEM__S_DCACHE_INV
// --- description from .arch file ---
// Invalidate the scalar data cache.
void
Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SMEM__S_DCACHE_WB class methods ---
Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_dcache_wb")
{
} // Inst_SMEM__S_DCACHE_WB
Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB()
{
} // ~Inst_SMEM__S_DCACHE_WB
// --- description from .arch file ---
// Write back dirty data in the scalar data cache.
void
Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SMEM__S_DCACHE_INV_VOL class methods ---
Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_dcache_inv_vol")
{
} // Inst_SMEM__S_DCACHE_INV_VOL
Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL()
{
} // ~Inst_SMEM__S_DCACHE_INV_VOL
// --- description from .arch file ---
// Invalidate the scalar data cache volatile lines.
void
Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SMEM__S_DCACHE_WB_VOL class methods ---
Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_dcache_wb_vol")
{
} // Inst_SMEM__S_DCACHE_WB_VOL
Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL()
{
} // ~Inst_SMEM__S_DCACHE_WB_VOL
// --- description from .arch file ---
// Write back dirty data in the scalar data cache volatile lines.
void
Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SMEM__S_MEMTIME class methods ---
Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_memtime")
{
} // Inst_SMEM__S_MEMTIME
Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME()
{
} // ~Inst_SMEM__S_MEMTIME
// --- description from .arch file ---
// Return current 64-bit timestamp.
void
Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SMEM__S_MEMREALTIME class methods ---
Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_memrealtime")
{
} // Inst_SMEM__S_MEMREALTIME
Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME()
{
} // ~Inst_SMEM__S_MEMREALTIME
// --- description from .arch file ---
// Return current 64-bit RTC.
void
Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SMEM__S_ATC_PROBE class methods ---
Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_atc_probe")
{
} // Inst_SMEM__S_ATC_PROBE
Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE()
{
} // ~Inst_SMEM__S_ATC_PROBE
// --- description from .arch file ---
// Probe or prefetch an address into the SQC data cache.
void
Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SMEM__S_ATC_PROBE_BUFFER class methods ---
Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER(
InFmt_SMEM *iFmt)
: Inst_SMEM(iFmt, "s_atc_probe_buffer")
{
} // Inst_SMEM__S_ATC_PROBE_BUFFER
Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER()
{
} // ~Inst_SMEM__S_ATC_PROBE_BUFFER
// --- description from .arch file ---
// Probe or prefetch an address into the SQC data cache.
void
Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_CNDMASK_B32 class methods ---
Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_cndmask_b32")
{
setFlag(ALU);
setFlag(ReadsVCC);
} // Inst_VOP2__V_CNDMASK_B32
Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32()
{
} // ~Inst_VOP2__V_CNDMASK_B32
// --- description from .arch file ---
// D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
// as a scalar GPR in S2.
void
Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
vcc.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane]
= bits(vcc.rawData(), lane) ? src1[lane] : src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_ADD_F32 class methods ---
Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_add_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP2__V_ADD_F32
Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32()
{
} // ~Inst_VOP2__V_ADD_F32
// --- description from .arch file ---
// D.f = S0.f + S1.f.
void
Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
VecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
if (isDPPInst()) {
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
src0_dpp.read();
DPRINTF(VEGA, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
"DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
"SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
"BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
extData.iFmt_VOP_DPP.DPP_CTRL,
extData.iFmt_VOP_DPP.SRC0_ABS,
extData.iFmt_VOP_DPP.SRC0_NEG,
extData.iFmt_VOP_DPP.SRC1_ABS,
extData.iFmt_VOP_DPP.SRC1_NEG,
extData.iFmt_VOP_DPP.BC,
extData.iFmt_VOP_DPP.BANK_MASK,
extData.iFmt_VOP_DPP.ROW_MASK);
processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0_dpp[lane] + src1[lane];
}
}
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_SUB_F32 class methods ---
Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_sub_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP2__V_SUB_F32
Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32()
{
} // ~Inst_VOP2__V_SUB_F32
// --- description from .arch file ---
// D.f = S0.f - S1.f.
// SQ translates to V_ADD_F32.
void
Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_SUBREV_F32 class methods ---
Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_subrev_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP2__V_SUBREV_F32
Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32()
{
} // ~Inst_VOP2__V_SUBREV_F32
// --- description from .arch file ---
// D.f = S1.f - S0.f.
// SQ translates to V_ADD_F32.
void
Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MUL_LEGACY_F32 class methods ---
Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mul_legacy_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP2__V_MUL_LEGACY_F32
Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32()
{
} // ~Inst_VOP2__V_MUL_LEGACY_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0).
void
Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] * src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MUL_F32 class methods ---
Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mul_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP2__V_MUL_F32
Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32()
{
} // ~Inst_VOP2__V_MUL_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f.
void
Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isnan(src0[lane]) ||
std::isnan(src1[lane])) {
vdst[lane] = NAN;
} else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) &&
!std::signbit(src0[lane])) {
if (std::isinf(src1[lane])) {
vdst[lane] = NAN;
} else if (!std::signbit(src1[lane])) {
vdst[lane] = +0.0;
} else {
vdst[lane] = -0.0;
}
} else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) &&
std::signbit(src0[lane])) {
if (std::isinf(src1[lane])) {
vdst[lane] = NAN;
} else if (std::signbit(src1[lane])) {
vdst[lane] = +0.0;
} else {
vdst[lane] = -0.0;
}
} else if (std::isinf(src0[lane]) &&
!std::signbit(src0[lane])) {
if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
vdst[lane] = NAN;
} else if (!std::signbit(src1[lane])) {
vdst[lane] = +INFINITY;
} else {
vdst[lane] = -INFINITY;
}
} else if (std::isinf(src0[lane]) &&
std::signbit(src0[lane])) {
if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
vdst[lane] = NAN;
} else if (std::signbit(src1[lane])) {
vdst[lane] = +INFINITY;
} else {
vdst[lane] = -INFINITY;
}
} else {
vdst[lane] = src0[lane] * src1[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MUL_I32_I24 class methods ---
Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mul_i32_i24")
{
setFlag(ALU);
} // Inst_VOP2__V_MUL_I32_I24
Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24()
{
} // ~Inst_VOP2__V_MUL_I32_I24
// --- description from .arch file ---
// D.i = S0.i[23:0] * S1.i[23:0].
void
Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
* sext<24>(bits(src1[lane], 23, 0));
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MUL_HI_I32_I24 class methods ---
Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mul_hi_i32_i24")
{
setFlag(ALU);
} // Inst_VOP2__V_MUL_HI_I32_I24
Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24()
{
} // ~Inst_VOP2__V_MUL_HI_I32_I24
// --- description from .arch file ---
// D.i = (S0.i[23:0] * S1.i[23:0])>>32.
void
Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemI64 tmp_src0
= (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
VecElemI64 tmp_src1
= (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MUL_U32_U24 class methods ---
Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mul_u32_u24")
{
setFlag(ALU);
} // Inst_VOP2__V_MUL_U32_U24
Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24()
{
} // ~Inst_VOP2__V_MUL_U32_U24
// --- description from .arch file ---
// D.u = S0.u[23:0] * S1.u[23:0].
void
Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
VecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
if (isSDWAInst()) {
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
// use copies of original src0, src1, and dest during selecting
VecOperandU32 origSrc0_sdwa(gpuDynInst,
extData.iFmt_VOP_SDWA.SRC0);
VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
VecOperandU32 origVdst(gpuDynInst, instData.VDST);
src0_sdwa.read();
origSrc0_sdwa.read();
origSrc1.read();
DPRINTF(VEGA, "Handling V_MUL_U32_U24 SRC SDWA. SRC0: register "
"v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: "
"%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
"%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
extData.iFmt_VOP_SDWA.DST_U,
extData.iFmt_VOP_SDWA.CLMP,
extData.iFmt_VOP_SDWA.SRC0_SEL,
extData.iFmt_VOP_SDWA.SRC0_SEXT,
extData.iFmt_VOP_SDWA.SRC0_NEG,
extData.iFmt_VOP_SDWA.SRC0_ABS,
extData.iFmt_VOP_SDWA.SRC1_SEL,
extData.iFmt_VOP_SDWA.SRC1_SEXT,
extData.iFmt_VOP_SDWA.SRC1_NEG,
extData.iFmt_VOP_SDWA.SRC1_ABS);
processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
src1, origSrc1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = bits(src0_sdwa[lane], 23, 0) *
bits(src1[lane], 23, 0);
origVdst[lane] = vdst[lane]; // keep copy consistent
}
}
processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = bits(src0[lane], 23, 0) *
bits(src1[lane], 23, 0);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MUL_HI_U32_U24 class methods ---
Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mul_hi_u32_u24")
{
setFlag(ALU);
} // Inst_VOP2__V_MUL_HI_U32_U24
Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24()
{
} // ~Inst_VOP2__V_MUL_HI_U32_U24
// --- description from .arch file ---
// D.i = (S0.u[23:0] * S1.u[23:0])>>32.
void
Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MIN_F32 class methods ---
Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_min_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP2__V_MIN_F32
Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32()
{
} // ~Inst_VOP2__V_MIN_F32
// --- description from .arch file ---
// D.f = (S0.f < S1.f ? S0.f : S1.f).
void
Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fmin(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MAX_F32 class methods ---
Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_max_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP2__V_MAX_F32
Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32()
{
} // ~Inst_VOP2__V_MAX_F32
// --- description from .arch file ---
// D.f = (S0.f >= S1.f ? S0.f : S1.f).
void
Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fmax(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MIN_I32 class methods ---
Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_min_i32")
{
setFlag(ALU);
} // Inst_VOP2__V_MIN_I32
Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32()
{
} // ~Inst_VOP2__V_MIN_I32
// --- description from .arch file ---
// D.i = min(S0.i, S1.i).
void
Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MAX_I32 class methods ---
Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_max_i32")
{
setFlag(ALU);
} // Inst_VOP2__V_MAX_I32
Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32()
{
} // ~Inst_VOP2__V_MAX_I32
// --- description from .arch file ---
// D.i = max(S0.i, S1.i).
void
Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MIN_U32 class methods ---
Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_min_u32")
{
setFlag(ALU);
} // Inst_VOP2__V_MIN_U32
Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32()
{
} // ~Inst_VOP2__V_MIN_U32
// --- description from .arch file ---
// D.u = min(S0.u, S1.u).
void
Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MAX_U32 class methods ---
Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_max_u32")
{
setFlag(ALU);
} // Inst_VOP2__V_MAX_U32
Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32()
{
} // ~Inst_VOP2__V_MAX_U32
// --- description from .arch file ---
// D.u = max(S0.u, S1.u).
void
Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_LSHRREV_B32 class methods ---
Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_lshrrev_b32")
{
setFlag(ALU);
} // Inst_VOP2__V_LSHRREV_B32
Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32()
{
} // ~Inst_VOP2__V_LSHRREV_B32
// --- description from .arch file ---
// D.u = S1.u >> S0.u[4:0].
// The vacated bits are set to zero.
// SQ translates this to an internal SP opcode.
void
Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_ASHRREV_I32 class methods ---
Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_ashrrev_i32")
{
setFlag(ALU);
} // Inst_VOP2__V_ASHRREV_I32
Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32()
{
} // ~Inst_VOP2__V_ASHRREV_I32
// --- description from .arch file ---
// D.i = signext(S1.i) >> S0.i[4:0].
// The vacated bits are set to the sign bit of the input value.
// SQ translates this to an internal SP opcode.
void
Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_LSHLREV_B32 class methods ---
Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_lshlrev_b32")
{
setFlag(ALU);
} // Inst_VOP2__V_LSHLREV_B32
Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32()
{
} // ~Inst_VOP2__V_LSHLREV_B32
// --- description from .arch file ---
// D.u = S1.u << S0.u[4:0].
// SQ translates this to an internal SP opcode.
void
Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
VecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
if (isSDWAInst()) {
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
// use copies of original src0, src1, and vdst during selecting
VecOperandU32 origSrc0_sdwa(gpuDynInst,
extData.iFmt_VOP_SDWA.SRC0);
VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
VecOperandU32 origVdst(gpuDynInst, instData.VDST);
src0_sdwa.read();
origSrc0_sdwa.read();
origSrc1.read();
DPRINTF(VEGA, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
"v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: "
"%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
"%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
extData.iFmt_VOP_SDWA.DST_U,
extData.iFmt_VOP_SDWA.CLMP,
extData.iFmt_VOP_SDWA.SRC0_SEL,
extData.iFmt_VOP_SDWA.SRC0_SEXT,
extData.iFmt_VOP_SDWA.SRC0_NEG,
extData.iFmt_VOP_SDWA.SRC0_ABS,
extData.iFmt_VOP_SDWA.SRC1_SEL,
extData.iFmt_VOP_SDWA.SRC1_SEXT,
extData.iFmt_VOP_SDWA.SRC1_NEG,
extData.iFmt_VOP_SDWA.SRC1_ABS);
processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
src1, origSrc1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0);
origVdst[lane] = vdst[lane]; // keep copy consistent
}
}
processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_AND_B32 class methods ---
Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_and_b32")
{
setFlag(ALU);
} // Inst_VOP2__V_AND_B32
Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32()
{
} // ~Inst_VOP2__V_AND_B32
// --- description from .arch file ---
// D.u = S0.u & S1.u.
// Input and output modifiers not supported.
void
Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] & src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_OR_B32 class methods ---
Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_or_b32")
{
setFlag(ALU);
} // Inst_VOP2__V_OR_B32
Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32()
{
} // ~Inst_VOP2__V_OR_B32
// --- description from .arch file ---
// D.u = S0.u | S1.u.
// Input and output modifiers not supported.
void
Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
VecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
if (isSDWAInst()) {
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
// use copies of original src0, src1, and dest during selecting
VecOperandU32 origSrc0_sdwa(gpuDynInst,
extData.iFmt_VOP_SDWA.SRC0);
VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
VecOperandU32 origVdst(gpuDynInst, instData.VDST);
src0_sdwa.read();
origSrc0_sdwa.read();
origSrc1.read();
DPRINTF(VEGA, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
"DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
"SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
"SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
extData.iFmt_VOP_SDWA.DST_U,
extData.iFmt_VOP_SDWA.CLMP,
extData.iFmt_VOP_SDWA.SRC0_SEL,
extData.iFmt_VOP_SDWA.SRC0_SEXT,
extData.iFmt_VOP_SDWA.SRC0_NEG,
extData.iFmt_VOP_SDWA.SRC0_ABS,
extData.iFmt_VOP_SDWA.SRC1_SEL,
extData.iFmt_VOP_SDWA.SRC1_SEXT,
extData.iFmt_VOP_SDWA.SRC1_NEG,
extData.iFmt_VOP_SDWA.SRC1_ABS);
processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
src1, origSrc1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0_sdwa[lane] | src1[lane];
origVdst[lane] = vdst[lane]; // keep copy consistent
}
}
processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] | src1[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_XOR_B32 class methods ---
Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_xor_b32")
{
setFlag(ALU);
} // Inst_VOP2__V_XOR_B32
Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32()
{
} // ~Inst_VOP2__V_XOR_B32
// --- description from .arch file ---
// D.u = S0.u ^ S1.u.
// Input and output modifiers not supported.
void
Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] ^ src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MAC_F32 class methods ---
Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mac_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(MAC);
} // Inst_VOP2__V_MAC_F32
Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32()
{
} // ~Inst_VOP2__V_MAC_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f + D.f.
// SQ translates to V_MAD_F32.
void
Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
VecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
vdst.read();
if (isDPPInst()) {
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
src0_dpp.read();
DPRINTF(VEGA, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
"DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
"SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
"BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
extData.iFmt_VOP_DPP.DPP_CTRL,
extData.iFmt_VOP_DPP.SRC0_ABS,
extData.iFmt_VOP_DPP.SRC0_NEG,
extData.iFmt_VOP_DPP.SRC1_ABS,
extData.iFmt_VOP_DPP.SRC1_NEG,
extData.iFmt_VOP_DPP.BC,
extData.iFmt_VOP_DPP.BANK_MASK,
extData.iFmt_VOP_DPP.ROW_MASK);
processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
vdst[lane]);
}
}
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MADMK_F32 class methods ---
Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_madmk_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(MAD);
} // Inst_VOP2__V_MADMK_F32
Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32()
{
} // ~Inst_VOP2__V_MADMK_F32
// --- description from .arch file ---
// D.f = S0.f * K + S1.f; K is a 32-bit inline constant.
// This opcode cannot use the VOP3 encoding and cannot use input/output
// --- modifiers.
// SQ translates to V_MAD_F32.
void
Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
VecElemF32 k = extData.imm_f32;
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], k, src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MADAK_F32 class methods ---
Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_madak_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(MAD);
} // Inst_VOP2__V_MADAK_F32
Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32()
{
} // ~Inst_VOP2__V_MADAK_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f + K; K is a 32-bit inline constant.
// This opcode cannot use the VOP3 encoding and cannot use input/output
// --- modifiers.
// SQ translates to V_MAD_F32.
void
Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
VecElemF32 k = extData.imm_f32;
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], k);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_ADD_CO_U32 class methods ---
Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_add_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
} // Inst_VOP2__V_ADD_CO_U32
Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32()
{
} // ~Inst_VOP2__V_ADD_CO_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u;
// VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
// --- overflow or carry-out for V_ADDC_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
void
Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
VecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
if (isSDWAInst()) {
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
// use copies of original src0, src1, and dest during selecting
VecOperandU32 origSrc0_sdwa(gpuDynInst,
extData.iFmt_VOP_SDWA.SRC0);
VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
VecOperandU32 origVdst(gpuDynInst, instData.VDST);
src0_sdwa.read();
origSrc0_sdwa.read();
origSrc1.read();
DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register "
"v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
"SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
"SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
extData.iFmt_VOP_SDWA.DST_U,
extData.iFmt_VOP_SDWA.CLMP,
extData.iFmt_VOP_SDWA.SRC0_SEL,
extData.iFmt_VOP_SDWA.SRC0_SEXT,
extData.iFmt_VOP_SDWA.SRC0_NEG,
extData.iFmt_VOP_SDWA.SRC0_ABS,
extData.iFmt_VOP_SDWA.SRC1_SEL,
extData.iFmt_VOP_SDWA.SRC1_SEXT,
extData.iFmt_VOP_SDWA.SRC1_NEG,
extData.iFmt_VOP_SDWA.SRC1_ABS);
processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
src1, origSrc1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0_sdwa[lane] + src1[lane];
origVdst[lane] = vdst[lane]; // keep copy consistent
vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane]
+ (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
}
}
processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane];
vcc.setBit(lane, ((VecElemU64)src0[lane]
+ (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
}
}
}
vcc.write();
vdst.write();
} // execute
// --- Inst_VOP2__V_SUB_CO_U32 class methods ---
Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_sub_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
} // Inst_VOP2__V_SUB_CO_U32
Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32()
{
} // ~Inst_VOP2__V_SUB_CO_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u;
// VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
// carry-out for V_SUBB_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
void
Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
}
}
vdst.write();
vcc.write();
} // execute
// --- Inst_VOP2__V_SUBREV_CO_U32 class methods ---
Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_subrev_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
} // Inst_VOP2__V_SUBREV_CO_U32
Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32()
{
} // ~Inst_VOP2__V_SUBREV_CO_U32
// --- description from .arch file ---
// D.u = S1.u - S0.u;
// VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
// carry-out for V_SUBB_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
void
Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vdst.write();
vcc.write();
} // execute
// --- Inst_VOP2__V_ADDC_CO_U32 class methods ---
Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_addc_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
} // Inst_VOP2__V_ADDC_CO_U32
Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32()
{
} // ~Inst_VOP2__V_ADDC_CO_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u + VCC[threadId];
// VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0)
// is an UNSIGNED overflow.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
// source comes from the SGPR-pair at S2.u.
void
Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
vcc.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane]
+ bits(vcc.rawData(), lane);
vcc.setBit(lane, ((VecElemU64)src0[lane]
+ (VecElemU64)src1[lane]
+ (VecElemU64)bits(vcc.rawData(), lane, lane))
>= 0x100000000 ? 1 : 0);
}
}
vdst.write();
vcc.write();
} // execute
// --- Inst_VOP2__V_SUBB_CO_U32 class methods ---
Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_subb_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
} // Inst_VOP2__V_SUBB_CO_U32
Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32()
{
} // ~Inst_VOP2__V_SUBB_CO_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u - VCC[threadId];
// VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
// --- overflow.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
// --- source comes from the SGPR-pair at S2.u.
void
Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
vcc.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane]
= src0[lane] - src1[lane] - bits(vcc.rawData(), lane);
vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
> src0[lane] ? 1 : 0);
}
}
vdst.write();
vcc.write();
} // execute
// --- Inst_VOP2__V_SUBBREV_CO_U32 class methods ---
Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_subbrev_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
} // Inst_VOP2__V_SUBBREV_CO_U32
Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32()
{
} // ~Inst_VOP2__V_SUBBREV_CO_U32
// --- description from .arch file ---
// D.u = S1.u - S0.u - VCC[threadId];
// VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
// overflow.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
// source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32.
// SQ translates this to V_SUBREV_U32 with reversed operands.
void
Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
vcc.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane]
= src1[lane] - src0[lane] - bits(vcc.rawData(), lane);
vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane))
> src1[lane] ? 1 : 0);
}
}
vdst.write();
vcc.write();
} // execute
// --- Inst_VOP2__V_ADD_F16 class methods ---
Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_add_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP2__V_ADD_F16
Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16()
{
} // ~Inst_VOP2__V_ADD_F16
// --- description from .arch file ---
// D.f16 = S0.f16 + S1.f16.
// Supports denormals, round mode, exception flags, saturation.
void
Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_SUB_F16 class methods ---
Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_sub_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP2__V_SUB_F16
Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16()
{
} // ~Inst_VOP2__V_SUB_F16
// --- description from .arch file ---
// D.f16 = S0.f16 - S1.f16.
// Supports denormals, round mode, exception flags, saturation.
// SQ translates to V_ADD_F16.
void
Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_SUBREV_F16 class methods ---
Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_subrev_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP2__V_SUBREV_F16
Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16()
{
} // ~Inst_VOP2__V_SUBREV_F16
// --- description from .arch file ---
// D.f16 = S1.f16 - S0.f16.
// Supports denormals, round mode, exception flags, saturation.
// SQ translates to V_ADD_F16.
void
Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_MUL_F16 class methods ---
Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mul_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP2__V_MUL_F16
Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16()
{
} // ~Inst_VOP2__V_MUL_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * S1.f16.
// Supports denormals, round mode, exception flags, saturation.
void
Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_MAC_F16 class methods ---
Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mac_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(MAC);
} // Inst_VOP2__V_MAC_F16
Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16()
{
} // ~Inst_VOP2__V_MAC_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * S1.f16 + D.f16.
// Supports round mode, exception flags, saturation.
// SQ translates this to V_MAD_F16.
void
Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_MADMK_F16 class methods ---
Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_madmk_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(MAD);
} // Inst_VOP2__V_MADMK_F16
Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16()
{
} // ~Inst_VOP2__V_MADMK_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored
// in the following literal DWORD.
// This opcode cannot use the VOP3 encoding and cannot use input/output
// modifiers. Supports round mode, exception flags, saturation.
// SQ translates this to V_MAD_F16.
void
Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_MADAK_F16 class methods ---
Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_madak_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(MAD);
} // Inst_VOP2__V_MADAK_F16
Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16()
{
} // ~Inst_VOP2__V_MADAK_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored
// in the following literal DWORD.
// This opcode cannot use the VOP3 encoding and cannot use input/output
// modifiers. Supports round mode, exception flags, saturation.
// SQ translates this to V_MAD_F16.
void
Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_ADD_U16 class methods ---
Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_add_u16")
{
setFlag(ALU);
} // Inst_VOP2__V_ADD_U16
Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16()
{
} // ~Inst_VOP2__V_ADD_U16
// --- description from .arch file ---
// D.u16 = S0.u16 + S1.u16.
// Supports saturation (unsigned 16-bit integer domain).
void
Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_SUB_U16 class methods ---
Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_sub_u16")
{
setFlag(ALU);
} // Inst_VOP2__V_SUB_U16
Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16()
{
} // ~Inst_VOP2__V_SUB_U16
// --- description from .arch file ---
// D.u16 = S0.u16 - S1.u16.
// Supports saturation (unsigned 16-bit integer domain).
void
Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_SUBREV_U16 class methods ---
Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_subrev_u16")
{
setFlag(ALU);
} // Inst_VOP2__V_SUBREV_U16
Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16()
{
} // ~Inst_VOP2__V_SUBREV_U16
// --- description from .arch file ---
// D.u16 = S1.u16 - S0.u16.
// Supports saturation (unsigned 16-bit integer domain).
// SQ translates this to V_SUB_U16 with reversed operands.
void
Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MUL_LO_U16 class methods ---
Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_mul_lo_u16")
{
setFlag(ALU);
} // Inst_VOP2__V_MUL_LO_U16
Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16()
{
} // ~Inst_VOP2__V_MUL_LO_U16
// --- description from .arch file ---
// D.u16 = S0.u16 * S1.u16.
// Supports saturation (unsigned 16-bit integer domain).
void
Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] * src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_LSHLREV_B16 class methods ---
Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_lshlrev_b16")
{
setFlag(ALU);
} // Inst_VOP2__V_LSHLREV_B16
Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16()
{
} // ~Inst_VOP2__V_LSHLREV_B16
// --- description from .arch file ---
// D.u[15:0] = S1.u[15:0] << S0.u[3:0].
// SQ translates this to an internal SP opcode.
void
Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_LSHRREV_B16 class methods ---
Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_lshrrev_b16")
{
setFlag(ALU);
} // Inst_VOP2__V_LSHRREV_B16
Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16()
{
} // ~Inst_VOP2__V_LSHRREV_B16
// --- description from .arch file ---
// D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
// The vacated bits are set to zero.
// SQ translates this to an internal SP opcode.
void
Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_ASHRREV_I16 class methods ---
Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_ashrrev_i16")
{
setFlag(ALU);
} // Inst_VOP2__V_ASHRREV_I16
Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16()
{
} // ~Inst_VOP2__V_ASHRREV_I16
// --- description from .arch file ---
// D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
// The vacated bits are set to the sign bit of the input value.
// SQ translates this to an internal SP opcode.
void
Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
VecOperandI16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MAX_F16 class methods ---
Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_max_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP2__V_MAX_F16
Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16()
{
} // ~Inst_VOP2__V_MAX_F16
// --- description from .arch file ---
// D.f16 = max(S0.f16, S1.f16).
// IEEE compliant. Supports denormals, round mode, exception flags,
// saturation.
void
Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_MIN_F16 class methods ---
Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_min_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP2__V_MIN_F16
Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16()
{
} // ~Inst_VOP2__V_MIN_F16
// --- description from .arch file ---
// D.f16 = min(S0.f16, S1.f16).
// IEEE compliant. Supports denormals, round mode, exception flags,
// saturation.
void
Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_MAX_U16 class methods ---
Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_max_u16")
{
setFlag(ALU);
} // Inst_VOP2__V_MAX_U16
Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16()
{
} // ~Inst_VOP2__V_MAX_U16
// --- description from .arch file ---
// D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
void
Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MAX_I16 class methods ---
Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_max_i16")
{
setFlag(ALU);
} // Inst_VOP2__V_MAX_I16
Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16()
{
} // ~Inst_VOP2__V_MAX_I16
// --- description from .arch file ---
// D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
void
Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
VecOperandI16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MIN_U16 class methods ---
Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_min_u16")
{
setFlag(ALU);
} // Inst_VOP2__V_MIN_U16
Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16()
{
} // ~Inst_VOP2__V_MIN_U16
// --- description from .arch file ---
// D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
void
Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_MIN_I16 class methods ---
Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_min_i16")
{
setFlag(ALU);
} // Inst_VOP2__V_MIN_I16
Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16()
{
} // ~Inst_VOP2__V_MIN_I16
// --- description from .arch file ---
// D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
void
Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
VecOperandI16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_LDEXP_F16 class methods ---
Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_ldexp_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP2__V_LDEXP_F16
Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16()
{
} // ~Inst_VOP2__V_LDEXP_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * (2 ** S1.i16).
void
Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP2__V_ADD_U32 class methods ---
Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_add_u32")
{
setFlag(ALU);
} // Inst_VOP2__V_ADD_U32
Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
{
} // ~Inst_VOP2__V_ADD_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u;
void
Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
VecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
if (isSDWAInst()) {
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
// use copies of original src0, src1, and dest during selecting
VecOperandU32 origSrc0_sdwa(gpuDynInst,
extData.iFmt_VOP_SDWA.SRC0);
VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
VecOperandU32 origVdst(gpuDynInst, instData.VDST);
src0_sdwa.read();
origSrc0_sdwa.read();
origSrc1.read();
DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
"DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
"SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
"SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
extData.iFmt_VOP_SDWA.DST_U,
extData.iFmt_VOP_SDWA.CLMP,
extData.iFmt_VOP_SDWA.SRC0_SEL,
extData.iFmt_VOP_SDWA.SRC0_SEXT,
extData.iFmt_VOP_SDWA.SRC0_NEG,
extData.iFmt_VOP_SDWA.SRC0_ABS,
extData.iFmt_VOP_SDWA.SRC1_SEL,
extData.iFmt_VOP_SDWA.SRC1_SEXT,
extData.iFmt_VOP_SDWA.SRC1_NEG,
extData.iFmt_VOP_SDWA.SRC1_ABS);
processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
src1, origSrc1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0_sdwa[lane] + src1[lane];
origVdst[lane] = vdst[lane]; // keep copy consistent
}
}
processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_SUB_U32 class methods ---
Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_sub_u32")
{
setFlag(ALU);
} // Inst_VOP2__V_SUB_U32
Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
{
} // ~Inst_VOP2__V_SUB_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u;
void
Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP2__V_SUBREV_U32 class methods ---
Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt)
: Inst_VOP2(iFmt, "v_subrev_u32")
{
setFlag(ALU);
} // Inst_VOP2__V_SUBREV_U32
Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
{
} // ~Inst_VOP2__V_SUBREV_U32
// --- description from .arch file ---
// D.u = S1.u - S0.u;
void
Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_NOP class methods ---
Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_nop")
{
setFlag(Nop);
setFlag(ALU);
} // Inst_VOP1__V_NOP
Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP()
{
} // ~Inst_VOP1__V_NOP
// --- description from .arch file ---
// Do nothing.
void
Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_VOP1__V_MOV_B32 class methods ---
Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_mov_b32")
{
setFlag(ALU);
} // Inst_VOP1__V_MOV_B32
Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32()
{
} // ~Inst_VOP1__V_MOV_B32
// --- description from .arch file ---
// D.u = S0.u.
// Input and output modifiers not supported; this is an untyped operation.
void
Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (isDPPInst()) {
VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
src_dpp.read();
DPRINTF(VEGA, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
"DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
"SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
"BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
extData.iFmt_VOP_DPP.DPP_CTRL,
extData.iFmt_VOP_DPP.SRC0_ABS,
extData.iFmt_VOP_DPP.SRC0_NEG,
extData.iFmt_VOP_DPP.SRC1_ABS,
extData.iFmt_VOP_DPP.SRC1_NEG,
extData.iFmt_VOP_DPP.BC,
extData.iFmt_VOP_DPP.BANK_MASK,
extData.iFmt_VOP_DPP.ROW_MASK);
// NOTE: For VOP1, there is no SRC1, so make sure we're not trying
// to negate it or take the absolute value of it
assert(!extData.iFmt_VOP_DPP.SRC1_ABS);
assert(!extData.iFmt_VOP_DPP.SRC1_NEG);
processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src_dpp[lane];
}
}
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_READFIRSTLANE_B32 class methods ---
Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32(
InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_readfirstlane_b32")
{
setFlag(ALU);
} // Inst_VOP1__V_READFIRSTLANE_B32
Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32()
{
} // ~Inst_VOP1__V_READFIRSTLANE_B32
// --- description from .arch file ---
// Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data
// (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec)
// (Lane# = 0 if exec is zero). Ignores exec mask for the access. SQ
// translates to V_READLANE_B32.
// Input and output modifiers not supported; this is an untyped operation.
void
Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarRegI32 src_lane(0);
ScalarRegU64 exec_mask = wf->execMask().to_ullong();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.VDST);
src.readSrc();
if (exec_mask) {
src_lane = findLsbSet(exec_mask);
}
sdst = src[src_lane];
sdst.write();
} // execute
// --- Inst_VOP1__V_CVT_I32_F64 class methods ---
Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_i32_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_CVT_I32_F64
Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64()
{
} // ~Inst_VOP1__V_CVT_I32_F64
// --- description from .arch file ---
// D.i = (int)S0.d.
// Out-of-range floating point values (including infinity) saturate. NaN is
// --- converted to 0.
void
Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
std::frexp(src[lane],&exp);
if (std::isnan(src[lane])) {
vdst[lane] = 0;
} else if (std::isinf(src[lane]) || exp > 30) {
if (std::signbit(src[lane])) {
vdst[lane] = INT_MIN;
} else {
vdst[lane] = INT_MAX;
}
} else {
vdst[lane] = (VecElemI32)src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F64_I32 class methods ---
Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f64_i32")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_CVT_F64_I32
Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32()
{
} // ~Inst_VOP1__V_CVT_F64_I32
// --- description from .arch file ---
// D.d = (double)S0.i.
void
Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF64)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F32_I32 class methods ---
Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f32_i32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_F32_I32
Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32()
{
} // ~Inst_VOP1__V_CVT_F32_I32
// --- description from .arch file ---
// D.f = (float)S0.i.
void
Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F32_U32 class methods ---
Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f32_u32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_F32_U32
Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32()
{
} // ~Inst_VOP1__V_CVT_F32_U32
// --- description from .arch file ---
// D.f = (float)S0.u.
void
Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_U32_F32 class methods ---
Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_u32_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_U32_F32
Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32()
{
} // ~Inst_VOP1__V_CVT_U32_F32
// --- description from .arch file ---
// D.u = (unsigned)S0.f.
// Out-of-range floating point values (including infinity) saturate. NaN is
// --- converted to 0.
void
Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
std::frexp(src[lane],&exp);
if (std::isnan(src[lane])) {
vdst[lane] = 0;
} else if (std::isinf(src[lane])) {
if (std::signbit(src[lane])) {
vdst[lane] = 0;
} else {
vdst[lane] = UINT_MAX;
}
} else if (exp > 31) {
vdst[lane] = UINT_MAX;
} else {
vdst[lane] = (VecElemU32)src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_I32_F32 class methods ---
Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_i32_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_I32_F32
Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32()
{
} // ~Inst_VOP1__V_CVT_I32_F32
// --- description from .arch file ---
// D.i = (int)S0.f.
// Out-of-range floating point values (including infinity) saturate. NaN is
// --- converted to 0.
void
Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
std::frexp(src[lane],&exp);
if (std::isnan(src[lane])) {
vdst[lane] = 0;
} else if (std::isinf(src[lane]) || exp > 30) {
if (std::signbit(src[lane])) {
vdst[lane] = INT_MIN;
} else {
vdst[lane] = INT_MAX;
}
} else {
vdst[lane] = (VecElemI32)src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_MOV_FED_B32 class methods ---
Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_mov_fed_b32")
{
setFlag(ALU);
} // Inst_VOP1__V_MOV_FED_B32
Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32()
{
} // ~Inst_VOP1__V_MOV_FED_B32
// --- description from .arch file ---
// D.u = S0.u;
// Introduce EDC double error upon write to dest vgpr without causing an
// --- exception.
// Input and output modifiers not supported; this is an untyped operation.
void
Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_CVT_F16_F32 class methods ---
Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f16_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_F16_F32
Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32()
{
} // ~Inst_VOP1__V_CVT_F16_F32
// --- description from .arch file ---
// D.f16 = flt32_to_flt16(S0.f).
// Supports input modifiers and creates FP16 denormals when appropriate.
void
Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_CVT_F32_F16 class methods ---
Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f32_f16")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_F32_F16
Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16()
{
} // ~Inst_VOP1__V_CVT_F32_F16
// --- description from .arch file ---
// D.f = flt16_to_flt32(S0.f16).
// FP16 denormal inputs are always accepted.
void
Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_CVT_RPI_I32_F32 class methods ---
Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32(
InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_RPI_I32_F32
Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32()
{
} // ~Inst_VOP1__V_CVT_RPI_I32_F32
// --- description from .arch file ---
// D.i = (int)floor(S0.f + 0.5).
void
Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_FLR_I32_F32 class methods ---
Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32(
InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_flr_i32_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_FLR_I32_F32
Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32()
{
} // ~Inst_VOP1__V_CVT_FLR_I32_F32
// --- description from .arch file ---
// D.i = (int)floor(S0.f).
void
Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemI32)std::floor(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_OFF_F32_I4 class methods ---
Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_off_f32_i4")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_OFF_F32_I4
Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4()
{
} // ~Inst_VOP1__V_CVT_OFF_F32_I4
// --- description from .arch file ---
// 4-bit signed int to 32-bit float. Used for interpolation in shader.
void
Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst)
{
// Could not parse sq_uc.arch desc field
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_CVT_F32_F64 class methods ---
Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f32_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_CVT_F32_F64
Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64()
{
} // ~Inst_VOP1__V_CVT_F32_F64
// --- description from .arch file ---
// D.f = (float)S0.d.
void
Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F64_F32 class methods ---
Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f64_f32")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_CVT_F64_F32
Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32()
{
} // ~Inst_VOP1__V_CVT_F64_F32
// --- description from .arch file ---
// D.d = (double)S0.f.
void
Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF64)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F32_UBYTE0 class methods ---
Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f32_ubyte0")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_F32_UBYTE0
Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0()
{
} // ~Inst_VOP1__V_CVT_F32_UBYTE0
// --- description from .arch file ---
// D.f = (float)(S0.u[7:0]).
void
Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F32_UBYTE1 class methods ---
Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f32_ubyte1")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_F32_UBYTE1
Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1()
{
} // ~Inst_VOP1__V_CVT_F32_UBYTE1
// --- description from .arch file ---
// D.f = (float)(S0.u[15:8]).
void
Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F32_UBYTE2 class methods ---
Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f32_ubyte2")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_F32_UBYTE2
Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2()
{
} // ~Inst_VOP1__V_CVT_F32_UBYTE2
// --- description from .arch file ---
// D.f = (float)(S0.u[23:16]).
void
Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F32_UBYTE3 class methods ---
Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f32_ubyte3")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CVT_F32_UBYTE3
Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3()
{
} // ~Inst_VOP1__V_CVT_F32_UBYTE3
// --- description from .arch file ---
// D.f = (float)(S0.u[31:24]).
void
Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_U32_F64 class methods ---
Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_u32_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_CVT_U32_F64
Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64()
{
} // ~Inst_VOP1__V_CVT_U32_F64
// --- description from .arch file ---
// D.u = (unsigned)S0.d.
// Out-of-range floating point values (including infinity) saturate. NaN is
// --- converted to 0.
void
Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
std::frexp(src[lane],&exp);
if (std::isnan(src[lane])) {
vdst[lane] = 0;
} else if (std::isinf(src[lane])) {
if (std::signbit(src[lane])) {
vdst[lane] = 0;
} else {
vdst[lane] = UINT_MAX;
}
} else if (exp > 31) {
vdst[lane] = UINT_MAX;
} else {
vdst[lane] = (VecElemU32)src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CVT_F64_U32 class methods ---
Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f64_u32")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_CVT_F64_U32
Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32()
{
} // ~Inst_VOP1__V_CVT_F64_U32
// --- description from .arch file ---
// D.d = (double)S0.u.
void
Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF64)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_TRUNC_F64 class methods ---
Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_trunc_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_TRUNC_F64
Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64()
{
} // ~Inst_VOP1__V_TRUNC_F64
// --- description from .arch file ---
// D.d = trunc(S0.d), return integer part of S0.d.
void
Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::trunc(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CEIL_F64 class methods ---
Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_ceil_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_CEIL_F64
Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64()
{
} // ~Inst_VOP1__V_CEIL_F64
// --- description from .arch file ---
// D.d = trunc(S0.d);
// if(S0.d > 0.0 && S0.d != D.d) then D.d += 1.0.
void
Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::ceil(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_RNDNE_F64 class methods ---
Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rndne_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_RNDNE_F64
Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64()
{
} // ~Inst_VOP1__V_RNDNE_F64
// --- description from .arch file ---
// D.d = round_nearest_even(S0.d).
void
Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = roundNearestEven(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FLOOR_F64 class methods ---
Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_floor_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_FLOOR_F64
Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64()
{
} // ~Inst_VOP1__V_FLOOR_F64
// --- description from .arch file ---
// D.d = trunc(S0.d);
// if(S0.d < 0.0 && S0.d != D.d) then D.d += -1.0.
void
Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::floor(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FRACT_F32 class methods ---
Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_fract_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_FRACT_F32
Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32()
{
} // ~Inst_VOP1__V_FRACT_F32
// --- description from .arch file ---
// D.f = S0.f - floor(S0.f).
void
Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemF32 int_part(0.0);
vdst[lane] = std::modf(src[lane], &int_part);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_TRUNC_F32 class methods ---
Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_trunc_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_TRUNC_F32
Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32()
{
} // ~Inst_VOP1__V_TRUNC_F32
// --- description from .arch file ---
// D.f = trunc(S0.f), return integer part of S0.f.
void
Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst (gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::trunc(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CEIL_F32 class methods ---
Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_ceil_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_CEIL_F32
Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32()
{
} // ~Inst_VOP1__V_CEIL_F32
// --- description from .arch file ---
// D.f = trunc(S0.f);
// if(S0.f > 0.0 && S0.f != D.f) then D.f += 1.0.
void
Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::ceil(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_RNDNE_F32 class methods ---
Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rndne_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_RNDNE_F32
Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32()
{
} // ~Inst_VOP1__V_RNDNE_F32
// --- description from .arch file ---
// D.f = round_nearest_even(S0.f).
void
Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = roundNearestEven(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FLOOR_F32 class methods ---
Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_floor_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_FLOOR_F32
Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32()
{
} // ~Inst_VOP1__V_FLOOR_F32
// --- description from .arch file ---
// D.f = trunc(S0.f);
// if(S0.f < 0.0 && S0.f != D.f) then D.f += -1.0.
void
Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::floor(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_EXP_F32 class methods ---
Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_exp_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_EXP_F32
Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32()
{
} // ~Inst_VOP1__V_EXP_F32
// --- description from .arch file ---
// D.f = pow(2.0, S0.f).
void
Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::pow(2.0, src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_LOG_F32 class methods ---
Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_log_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_LOG_F32
Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32()
{
} // ~Inst_VOP1__V_LOG_F32
// --- description from .arch file ---
// D.f = log2(S0.f). Base 2 logarithm.
void
Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::log2(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_RCP_F32 class methods ---
Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rcp_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_RCP_F32
Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32()
{
} // ~Inst_VOP1__V_RCP_F32
// --- description from .arch file ---
// D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error.
void
Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = 1.0 / src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_RCP_IFLAG_F32 class methods ---
Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rcp_iflag_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_RCP_IFLAG_F32
Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32()
{
} // ~Inst_VOP1__V_RCP_IFLAG_F32
// --- description from .arch file ---
// D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise
// --- integer DIV_BY_ZERO exception but cannot raise floating-point
// --- exceptions.
void
Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = 1.0 / src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_RSQ_F32 class methods ---
Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rsq_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_RSQ_F32
Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32()
{
} // ~Inst_VOP1__V_RSQ_F32
// --- description from .arch file ---
// D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules.
void
Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = 1.0 / std::sqrt(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_RCP_F64 class methods ---
Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rcp_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_RCP_F64
Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64()
{
} // ~Inst_VOP1__V_RCP_F64
// --- description from .arch file ---
// D.d = 1.0 / S0.d.
void
Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::fpclassify(src[lane]) == FP_ZERO) {
vdst[lane] = +INFINITY;
} else if (std::isnan(src[lane])) {
vdst[lane] = NAN;
} else if (std::isinf(src[lane])) {
if (std::signbit(src[lane])) {
vdst[lane] = -0.0;
} else {
vdst[lane] = 0.0;
}
} else {
vdst[lane] = 1.0 / src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_RSQ_F64 class methods ---
Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rsq_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_RSQ_F64
Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64()
{
} // ~Inst_VOP1__V_RSQ_F64
// --- description from .arch file ---
// D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32.
void
Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::fpclassify(src[lane]) == FP_ZERO) {
vdst[lane] = +INFINITY;
} else if (std::isnan(src[lane])) {
vdst[lane] = NAN;
} else if (std::isinf(src[lane])
&& !std::signbit(src[lane])) {
vdst[lane] = 0.0;
} else if (std::signbit(src[lane])) {
vdst[lane] = NAN;
} else {
vdst[lane] = 1.0 / std::sqrt(src[lane]);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_SQRT_F32 class methods ---
Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_sqrt_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_SQRT_F32
Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32()
{
} // ~Inst_VOP1__V_SQRT_F32
// --- description from .arch file ---
// D.f = sqrt(S0.f).
void
Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::sqrt(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_SQRT_F64 class methods ---
Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_sqrt_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_SQRT_F64
Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64()
{
} // ~Inst_VOP1__V_SQRT_F64
// --- description from .arch file ---
// D.d = sqrt(S0.d).
void
Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::sqrt(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_SIN_F32 class methods ---
Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_sin_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_SIN_F32
Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32()
{
} // ~Inst_VOP1__V_SIN_F32
// --- description from .arch file ---
// D.f = sin(S0.f * 2 * PI).
// Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
// float 0.0.
void
Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
pi.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (src[lane] < -256.0 || src[lane] > 256.0) {
vdst[lane] = 0.0;
} else {
vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData());
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_COS_F32 class methods ---
Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cos_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_COS_F32
Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32()
{
} // ~Inst_VOP1__V_COS_F32
// --- description from .arch file ---
// D.f = cos(S0.f * 2 * PI).
// Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
// float 1.0.
void
Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
pi.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (src[lane] < -256.0 || src[lane] > 256.0) {
vdst[lane] = 0.0;
} else {
vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData());
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_NOT_B32 class methods ---
Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_not_b32")
{
setFlag(ALU);
} // Inst_VOP1__V_NOT_B32
Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32()
{
} // ~Inst_VOP1__V_NOT_B32
// --- description from .arch file ---
// D.u = ~S0.u.
// Input and output modifiers not supported.
void
Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = ~src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_BFREV_B32 class methods ---
Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_bfrev_b32")
{
setFlag(ALU);
} // Inst_VOP1__V_BFREV_B32
Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32()
{
} // ~Inst_VOP1__V_BFREV_B32
// --- description from .arch file ---
// D.u[31:0] = S0.u[0:31], bitfield reverse.
// Input and output modifiers not supported.
void
Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = reverseBits(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FFBH_U32 class methods ---
Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_ffbh_u32")
{
setFlag(ALU);
} // Inst_VOP1__V_FFBH_U32
Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32()
{
} // ~Inst_VOP1__V_FFBH_U32
// --- description from .arch file ---
// D.u = position of first 1 in S0.u from MSB;
// D.u = 0xffffffff if S0.u == 0.
void
Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = findFirstOneMsb(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FFBL_B32 class methods ---
Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_ffbl_b32")
{
setFlag(ALU);
} // Inst_VOP1__V_FFBL_B32
Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32()
{
} // ~Inst_VOP1__V_FFBL_B32
// --- description from .arch file ---
// D.u = position of first 1 in S0.u from LSB;
// D.u = 0xffffffff if S0.u == 0.
void
Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = findFirstOne(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FFBH_I32 class methods ---
Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_ffbh_i32")
{
setFlag(ALU);
} // Inst_VOP1__V_FFBH_I32
Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32()
{
} // ~Inst_VOP1__V_FFBH_I32
// --- description from .arch file ---
// D.u = position of first bit different from sign bit in S0.i from MSB;
// D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
void
Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = firstOppositeSignBit(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FREXP_EXP_I32_F64 class methods ---
Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64(
InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_frexp_exp_i32_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_FREXP_EXP_I32_F64
Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64()
{
} // ~Inst_VOP1__V_FREXP_EXP_I32_F64
// --- description from .arch file ---
// See V_FREXP_EXP_I32_F32.
void
Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
vdst[lane] = 0;
} else {
VecElemI32 exp = 0;
std::frexp(src[lane], &exp);
vdst[lane] = exp;
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FREXP_MANT_F64 class methods ---
Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_frexp_mant_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_FREXP_MANT_F64
Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64()
{
} // ~Inst_VOP1__V_FREXP_MANT_F64
// --- description from .arch file ---
// See V_FREXP_MANT_F32.
void
Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
vdst[lane] = src[lane];
} else {
VecElemI32 exp(0);
vdst[lane] = std::frexp(src[lane], &exp);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FRACT_F64 class methods ---
Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_fract_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP1__V_FRACT_F64
Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64()
{
} // ~Inst_VOP1__V_FRACT_F64
// --- description from .arch file ---
// See V_FRACT_F32.
void
Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemF64 int_part(0.0);
vdst[lane] = std::modf(src[lane], &int_part);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FREXP_EXP_I32_F32 class methods ---
Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32(
InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_frexp_exp_i32_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_FREXP_EXP_I32_F32
Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32()
{
} // ~Inst_VOP1__V_FREXP_EXP_I32_F32
// --- description from .arch file ---
// if(S0.f == INF || S0.f == NAN) then D.i = 0;
// else D.i = TwosComplement(Exponent(S0.f) - 127 + 1).
// Returns exponent of single precision float input, such that S0.f =
// significand * (2 ** exponent). See also FREXP_MANT_F32, which returns
// the significand.
void
Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
vdst[lane] = 0;
} else {
VecElemI32 exp(0);
std::frexp(src[lane], &exp);
vdst[lane] = exp;
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_FREXP_MANT_F32 class methods ---
Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_frexp_mant_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_FREXP_MANT_F32
Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32()
{
} // ~Inst_VOP1__V_FREXP_MANT_F32
// --- description from .arch file ---
// if(S0.f == INF || S0.f == NAN) then D.f = S0.f;
// else D.f = Mantissa(S0.f).
// Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary
// --- significand of single precision float input, such that S0.f =
// --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which
// --- returns integer exponent.
void
Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
vdst[lane] = src[lane];
} else {
VecElemI32 exp(0);
vdst[lane] = std::frexp(src[lane], &exp);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_CLREXCP class methods ---
Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_clrexcp")
{
setFlag(ALU);
} // Inst_VOP1__V_CLREXCP
Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP()
{
} // ~Inst_VOP1__V_CLREXCP
// --- description from .arch file ---
// Clear wave's exception state in SIMD (SP).
void
Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_CVT_F16_U16 class methods ---
Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f16_u16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_CVT_F16_U16
Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16()
{
} // ~Inst_VOP1__V_CVT_F16_U16
// --- description from .arch file ---
// D.f16 = uint16_to_flt16(S.u16).
// Supports denormals, rounding, exception flags and saturation.
void
Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_CVT_F16_I16 class methods ---
Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_f16_i16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_CVT_F16_I16
Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16()
{
} // ~Inst_VOP1__V_CVT_F16_I16
// --- description from .arch file ---
// D.f16 = int16_to_flt16(S.i16).
// Supports denormals, rounding, exception flags and saturation.
void
Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_CVT_U16_F16 class methods ---
Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_u16_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_CVT_U16_F16
Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16()
{
} // ~Inst_VOP1__V_CVT_U16_F16
// --- description from .arch file ---
// D.u16 = flt16_to_uint16(S.f16).
// Supports rounding, exception flags and saturation.
void
Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_CVT_I16_F16 class methods ---
Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cvt_i16_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_CVT_I16_F16
Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16()
{
} // ~Inst_VOP1__V_CVT_I16_F16
// --- description from .arch file ---
// D.i16 = flt16_to_int16(S.f16).
// Supports rounding, exception flags and saturation.
void
Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_RCP_F16 class methods ---
Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rcp_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_RCP_F16
Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16()
{
} // ~Inst_VOP1__V_RCP_F16
// --- description from .arch file ---
// if(S0.f16 == 1.0f)
// D.f16 = 1.0f;
// else
// D.f16 = ApproximateRecip(S0.f16).
void
Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_SQRT_F16 class methods ---
Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_sqrt_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_SQRT_F16
Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16()
{
} // ~Inst_VOP1__V_SQRT_F16
// --- description from .arch file ---
// if(S0.f16 == 1.0f)
// D.f16 = 1.0f;
// else
// D.f16 = ApproximateSqrt(S0.f16).
void
Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_RSQ_F16 class methods ---
Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rsq_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_RSQ_F16
Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16()
{
} // ~Inst_VOP1__V_RSQ_F16
// --- description from .arch file ---
// if(S0.f16 == 1.0f)
// D.f16 = 1.0f;
// else
// D.f16 = ApproximateRecipSqrt(S0.f16).
void
Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_LOG_F16 class methods ---
Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_log_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_LOG_F16
Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16()
{
} // ~Inst_VOP1__V_LOG_F16
// --- description from .arch file ---
// if(S0.f16 == 1.0f)
// D.f16 = 0.0f;
// else
// D.f16 = ApproximateLog2(S0.f16).
void
Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_EXP_F16 class methods ---
Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_exp_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_EXP_F16
Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16()
{
} // ~Inst_VOP1__V_EXP_F16
// --- description from .arch file ---
// if(S0.f16 == 0.0f)
// D.f16 = 1.0f;
// else
// D.f16 = Approximate2ToX(S0.f16).
void
Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_FREXP_MANT_F16 class methods ---
Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_frexp_mant_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_FREXP_MANT_F16
Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16()
{
} // ~Inst_VOP1__V_FREXP_MANT_F16
// --- description from .arch file ---
// if(S0.f16 == +-INF || S0.f16 == NAN)
// D.f16 = S0.f16;
// else
// D.f16 = mantissa(S0.f16).
// Result range is (-1.0,-0.5][0.5,1.0).
// C math library frexp function.
// Returns binary significand of half precision float input, such that the
// original single float = significand * (2 ** exponent).
void
Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_FREXP_EXP_I16_F16 class methods ---
Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16(
InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_frexp_exp_i16_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_FREXP_EXP_I16_F16
Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16()
{
} // ~Inst_VOP1__V_FREXP_EXP_I16_F16
// --- description from .arch file ---
// if(S0.f16 == +-INF || S0.f16 == NAN)
// D.i16 = 0;
// else
// D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1).
// C math library frexp function.
// Returns exponent of half precision float input, such that the
// original single float = significand * (2 ** exponent).
void
Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_FLOOR_F16 class methods ---
Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_floor_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_FLOOR_F16
Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16()
{
} // ~Inst_VOP1__V_FLOOR_F16
// --- description from .arch file ---
// D.f16 = trunc(S0.f16);
// if(S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f.
void
Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_CEIL_F16 class methods ---
Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_ceil_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_CEIL_F16
Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16()
{
} // ~Inst_VOP1__V_CEIL_F16
// --- description from .arch file ---
// D.f16 = trunc(S0.f16);
// if(S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f.
void
Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_TRUNC_F16 class methods ---
Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_trunc_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_TRUNC_F16
Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16()
{
} // ~Inst_VOP1__V_TRUNC_F16
// --- description from .arch file ---
// D.f16 = trunc(S0.f16).
// Round-to-zero semantics.
void
Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_RNDNE_F16 class methods ---
Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_rndne_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_RNDNE_F16
Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16()
{
} // ~Inst_VOP1__V_RNDNE_F16
// --- description from .arch file ---
// D.f16 = FLOOR(S0.f16 + 0.5f);
// if(floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f.
// Round-to-nearest-even semantics.
void
Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_FRACT_F16 class methods ---
Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_fract_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_FRACT_F16
Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16()
{
} // ~Inst_VOP1__V_FRACT_F16
// --- description from .arch file ---
// D.f16 = S0.f16 + -floor(S0.f16).
void
Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_SIN_F16 class methods ---
Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_sin_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_SIN_F16
Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16()
{
} // ~Inst_VOP1__V_SIN_F16
// --- description from .arch file ---
// D.f16 = sin(S0.f16 * 2 * PI).
void
Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_COS_F16 class methods ---
Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_cos_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP1__V_COS_F16
Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16()
{
} // ~Inst_VOP1__V_COS_F16
// --- description from .arch file ---
// D.f16 = cos(S0.f16 * 2 * PI).
void
Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP1__V_EXP_LEGACY_F32 class methods ---
Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_exp_legacy_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_EXP_LEGACY_F32
Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32()
{
} // ~Inst_VOP1__V_EXP_LEGACY_F32
// --- description from .arch file ---
// D.f = pow(2.0, S0.f) with legacy semantics.
void
Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::pow(2.0, src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP1__V_LOG_LEGACY_F32 class methods ---
Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt)
: Inst_VOP1(iFmt, "v_log_legacy_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP1__V_LOG_LEGACY_F32
Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32()
{
} // ~Inst_VOP1__V_LOG_LEGACY_F32
// --- description from .arch file ---
// D.f = log2(S0.f). Base 2 logarithm with legacy semantics.
void
Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::log2(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOPC__V_CMP_CLASS_F32 class methods ---
Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_class_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_CLASS_F32
Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32()
{
} // ~Inst_VOPC__V_CMP_CLASS_F32
// --- description from .arch file ---
// VCC = IEEE numeric class function specified in S1.u, performed on S0.f
// The function reports true if the floating point value is *any* of the
// --- numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
// is NaN
if (std::isnan(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 2)) {
// is -infinity
if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 3)) {
// is -normal
if (std::isnormal(src0[lane])
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 4)) {
// is -denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 5)) {
// is -zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 6)) {
// is +zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 7)) {
// is +denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 8)) {
// is +normal
if (std::isnormal(src0[lane])
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 9)) {
// is +infinity
if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_CLASS_F32 class methods ---
Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_class_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_CLASS_F32
Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32()
{
} // ~Inst_VOPC__V_CMPX_CLASS_F32
// --- description from .arch file ---
// EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
// S0.f The function reports true if the floating point value is *any* of
// the numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
// is NaN
if (std::isnan(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 2)) {
// is -infinity
if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 3)) {
// is -normal
if (std::isnormal(src0[lane])
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 4)) {
// is -denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 5)) {
// is -zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 6)) {
// is +zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 7)) {
// is +denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 8)) {
// is +normal
if (std::isnormal(src0[lane])
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 9)) {
// is +infinity
if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMP_CLASS_F64 class methods ---
Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_class_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_CLASS_F64
Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64()
{
} // ~Inst_VOPC__V_CMP_CLASS_F64
// --- description from .arch file ---
// VCC = IEEE numeric class function specified in S1.u, performed on S0.d
// The function reports true if the floating point value is *any* of the
// --- numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
// is NaN
if (std::isnan(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 2)) {
// is -infinity
if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 3)) {
// is -normal
if (std::isnormal(src0[lane])
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 4)) {
// is -denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 5)) {
// is -zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 6)) {
// is +zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 7)) {
// is +denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 8)) {
// is +normal
if (std::isnormal(src0[lane])
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 9)) {
// is +infinity
if (std::isinf(src0[lane])
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_CLASS_F64 class methods ---
Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_class_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_CLASS_F64
Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64()
{
} // ~Inst_VOPC__V_CMPX_CLASS_F64
// --- description from .arch file ---
// EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
// S0.d The function reports true if the floating point value is *any* of
// the numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
// is NaN
if (std::isnan(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 2)) {
// is -infinity
if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 3)) {
// is -normal
if (std::isnormal(src0[lane])
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 4)) {
// is -denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 5)) {
// is -zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 6)) {
// is +zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 7)) {
// is +denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 8)) {
// is +normal
if (std::isnormal(src0[lane])
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 9)) {
// is +infinity
if (std::isinf(src0[lane])
&& !std::signbit(src0[lane])) {
vcc.setBit(lane, 1);
continue;
}
}
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMP_CLASS_F16 class methods ---
Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_class_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_CLASS_F16
Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16()
{
} // ~Inst_VOPC__V_CMP_CLASS_F16
// --- description from .arch file ---
// VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
// The function reports true if the floating point value is *any* of the
// --- numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_CLASS_F16 class methods ---
Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_class_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_CLASS_F16
Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16()
{
} // ~Inst_VOPC__V_CMPX_CLASS_F16
// --- description from .arch file ---
// EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
// --- S0.f16
// The function reports true if the floating point value is *any* of the
// --- numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_F_F16 class methods ---
Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_f_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_F_F16
Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16()
{
} // ~Inst_VOPC__V_CMP_F_F16
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_LT_F16 class methods ---
Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lt_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_LT_F16
Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16()
{
} // ~Inst_VOPC__V_CMP_LT_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_EQ_F16 class methods ---
Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_eq_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_EQ_F16
Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16()
{
} // ~Inst_VOPC__V_CMP_EQ_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_LE_F16 class methods ---
Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_le_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_LE_F16
Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16()
{
} // ~Inst_VOPC__V_CMP_LE_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_GT_F16 class methods ---
Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_gt_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_GT_F16
Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16()
{
} // ~Inst_VOPC__V_CMP_GT_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_LG_F16 class methods ---
Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lg_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_LG_F16
Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16()
{
} // ~Inst_VOPC__V_CMP_LG_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_GE_F16 class methods ---
Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ge_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_GE_F16
Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16()
{
} // ~Inst_VOPC__V_CMP_GE_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_O_F16 class methods ---
Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_o_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_O_F16
Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16()
{
} // ~Inst_VOPC__V_CMP_O_F16
// --- description from .arch file ---
// D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_U_F16 class methods ---
Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_u_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_U_F16
Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16()
{
} // ~Inst_VOPC__V_CMP_U_F16
// --- description from .arch file ---
// D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_NGE_F16 class methods ---
Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nge_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_NGE_F16
Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16()
{
} // ~Inst_VOPC__V_CMP_NGE_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_NLG_F16 class methods ---
Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nlg_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_NLG_F16
Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16()
{
} // ~Inst_VOPC__V_CMP_NLG_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_NGT_F16 class methods ---
Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ngt_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_NGT_F16
Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16()
{
} // ~Inst_VOPC__V_CMP_NGT_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_NLE_F16 class methods ---
Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nle_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_NLE_F16
Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16()
{
} // ~Inst_VOPC__V_CMP_NLE_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_NEQ_F16 class methods ---
Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_neq_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_NEQ_F16
Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16()
{
} // ~Inst_VOPC__V_CMP_NEQ_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_NLT_F16 class methods ---
Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nlt_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_NLT_F16
Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16()
{
} // ~Inst_VOPC__V_CMP_NLT_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_TRU_F16 class methods ---
Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_tru_f16")
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOPC__V_CMP_TRU_F16
Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16()
{
} // ~Inst_VOPC__V_CMP_TRU_F16
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_F_F16 class methods ---
Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_f_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_F_F16
Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16()
{
} // ~Inst_VOPC__V_CMPX_F_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_LT_F16 class methods ---
Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lt_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LT_F16
Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16()
{
} // ~Inst_VOPC__V_CMPX_LT_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_EQ_F16 class methods ---
Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_eq_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_EQ_F16
Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16()
{
} // ~Inst_VOPC__V_CMPX_EQ_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_LE_F16 class methods ---
Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_le_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LE_F16
Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16()
{
} // ~Inst_VOPC__V_CMPX_LE_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_GT_F16 class methods ---
Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_gt_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GT_F16
Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16()
{
} // ~Inst_VOPC__V_CMPX_GT_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_LG_F16 class methods ---
Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lg_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LG_F16
Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16()
{
} // ~Inst_VOPC__V_CMPX_LG_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_GE_F16 class methods ---
Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ge_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GE_F16
Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16()
{
} // ~Inst_VOPC__V_CMPX_GE_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_O_F16 class methods ---
Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_o_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_O_F16
Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16()
{
} // ~Inst_VOPC__V_CMPX_O_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_U_F16 class methods ---
Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_u_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_U_F16
Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16()
{
} // ~Inst_VOPC__V_CMPX_U_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_NGE_F16 class methods ---
Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nge_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NGE_F16
Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16()
{
} // ~Inst_VOPC__V_CMPX_NGE_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_NLG_F16 class methods ---
Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nlg_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NLG_F16
Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16()
{
} // ~Inst_VOPC__V_CMPX_NLG_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_NGT_F16 class methods ---
Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ngt_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NGT_F16
Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16()
{
} // ~Inst_VOPC__V_CMPX_NGT_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_NLE_F16 class methods ---
Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nle_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NLE_F16
Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16()
{
} // ~Inst_VOPC__V_CMPX_NLE_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_NEQ_F16 class methods ---
Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_neq_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NEQ_F16
Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16()
{
} // ~Inst_VOPC__V_CMPX_NEQ_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_NLT_F16 class methods ---
Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nlt_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NLT_F16
Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16()
{
} // ~Inst_VOPC__V_CMPX_NLT_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMPX_TRU_F16 class methods ---
Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_tru_f16")
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_TRU_F16
Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16()
{
} // ~Inst_VOPC__V_CMPX_TRU_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOPC__V_CMP_F_F32 class methods ---
Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_f_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_F_F32
Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32()
{
} // ~Inst_VOPC__V_CMP_F_F32
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LT_F32 class methods ---
Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lt_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_LT_F32
Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32()
{
} // ~Inst_VOPC__V_CMP_LT_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_EQ_F32 class methods ---
Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_eq_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_EQ_F32
Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32()
{
} // ~Inst_VOPC__V_CMP_EQ_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LE_F32 class methods ---
Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_le_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_LE_F32
Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32()
{
} // ~Inst_VOPC__V_CMP_LE_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GT_F32 class methods ---
Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_gt_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_GT_F32
Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32()
{
} // ~Inst_VOPC__V_CMP_GT_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LG_F32 class methods ---
Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lg_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_LG_F32
Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32()
{
} // ~Inst_VOPC__V_CMP_LG_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GE_F32 class methods ---
Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ge_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_GE_F32
Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32()
{
} // ~Inst_VOPC__V_CMP_GE_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_O_F32 class methods ---
Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_o_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_O_F32
Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32()
{
} // ~Inst_VOPC__V_CMP_O_F32
// --- description from .arch file ---
// D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (!std::isnan(src0[lane])
&& !std::isnan(src1[lane])) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_U_F32 class methods ---
Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_u_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_U_F32
Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32()
{
} // ~Inst_VOPC__V_CMP_U_F32
// --- description from .arch file ---
// D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (std::isnan(src0[lane])
|| std::isnan(src1[lane])) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NGE_F32 class methods ---
Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nge_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_NGE_F32
Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32()
{
} // ~Inst_VOPC__V_CMP_NGE_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NLG_F32 class methods ---
Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nlg_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_NLG_F32
Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32()
{
} // ~Inst_VOPC__V_CMP_NLG_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NGT_F32 class methods ---
Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ngt_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_NGT_F32
Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32()
{
} // ~Inst_VOPC__V_CMP_NGT_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NLE_F32 class methods ---
Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nle_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_NLE_F32
Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32()
{
} // ~Inst_VOPC__V_CMP_NLE_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NEQ_F32 class methods ---
Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_neq_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_NEQ_F32
Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32()
{
} // ~Inst_VOPC__V_CMP_NEQ_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NLT_F32 class methods ---
Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nlt_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_NLT_F32
Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32()
{
} // ~Inst_VOPC__V_CMP_NLT_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_TRU_F32 class methods ---
Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_tru_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOPC__V_CMP_TRU_F32
Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32()
{
} // ~Inst_VOPC__V_CMP_TRU_F32
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_F_F32 class methods ---
Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_f_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_F_F32
Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32()
{
} // ~Inst_VOPC__V_CMPX_F_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_LT_F32 class methods ---
Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lt_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LT_F32
Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32()
{
} // ~Inst_VOPC__V_CMPX_LT_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_EQ_F32 class methods ---
Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_eq_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_EQ_F32
Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32()
{
} // ~Inst_VOPC__V_CMPX_EQ_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_LE_F32 class methods ---
Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_le_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LE_F32
Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32()
{
} // ~Inst_VOPC__V_CMPX_LE_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_GT_F32 class methods ---
Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_gt_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GT_F32
Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32()
{
} // ~Inst_VOPC__V_CMPX_GT_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_LG_F32 class methods ---
Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lg_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LG_F32
Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32()
{
} // ~Inst_VOPC__V_CMPX_LG_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_GE_F32 class methods ---
Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ge_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GE_F32
Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32()
{
} // ~Inst_VOPC__V_CMPX_GE_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_O_F32 class methods ---
Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_o_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_O_F32
Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32()
{
} // ~Inst_VOPC__V_CMPX_O_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (!std::isnan(src0[lane])
&& !std::isnan(src1[lane])) ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_U_F32 class methods ---
Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_u_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_U_F32
Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32()
{
} // ~Inst_VOPC__V_CMPX_U_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (std::isnan(src0[lane])
|| std::isnan(src1[lane])) ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_NGE_F32 class methods ---
Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nge_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NGE_F32
Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32()
{
} // ~Inst_VOPC__V_CMPX_NGE_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_NLG_F32 class methods ---
Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nlg_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NLG_F32
Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32()
{
} // ~Inst_VOPC__V_CMPX_NLG_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_NGT_F32 class methods ---
Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ngt_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NGT_F32
Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32()
{
} // ~Inst_VOPC__V_CMPX_NGT_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_NLE_F32 class methods ---
Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nle_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NLE_F32
Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32()
{
} // ~Inst_VOPC__V_CMPX_NLE_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_NEQ_F32 class methods ---
Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_neq_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NEQ_F32
Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32()
{
} // ~Inst_VOPC__V_CMPX_NEQ_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_NLT_F32 class methods ---
Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nlt_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NLT_F32
Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32()
{
} // ~Inst_VOPC__V_CMPX_NLT_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_TRU_F32 class methods ---
Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_tru_f32")
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_TRU_F32
Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32()
{
} // ~Inst_VOPC__V_CMPX_TRU_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMP_F_F64 class methods ---
Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_f_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_F_F64
Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64()
{
} // ~Inst_VOPC__V_CMP_F_F64
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LT_F64 class methods ---
Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lt_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_LT_F64
Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64()
{
} // ~Inst_VOPC__V_CMP_LT_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_EQ_F64 class methods ---
Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_eq_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_EQ_F64
Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64()
{
} // ~Inst_VOPC__V_CMP_EQ_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LE_F64 class methods ---
Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_le_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_LE_F64
Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64()
{
} // ~Inst_VOPC__V_CMP_LE_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GT_F64 class methods ---
Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_gt_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_GT_F64
Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64()
{
} // ~Inst_VOPC__V_CMP_GT_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LG_F64 class methods ---
Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lg_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_LG_F64
Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64()
{
} // ~Inst_VOPC__V_CMP_LG_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GE_F64 class methods ---
Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ge_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_GE_F64
Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64()
{
} // ~Inst_VOPC__V_CMP_GE_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_O_F64 class methods ---
Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_o_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_O_F64
Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64()
{
} // ~Inst_VOPC__V_CMP_O_F64
// --- description from .arch file ---
// D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (!std::isnan(src0[lane])
&& !std::isnan(src1[lane])) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_U_F64 class methods ---
Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_u_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_U_F64
Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64()
{
} // ~Inst_VOPC__V_CMP_U_F64
// --- description from .arch file ---
// D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (std::isnan(src0[lane])
|| std::isnan(src1[lane])) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NGE_F64 class methods ---
Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nge_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_NGE_F64
Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64()
{
} // ~Inst_VOPC__V_CMP_NGE_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NLG_F64 class methods ---
Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nlg_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_NLG_F64
Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64()
{
} // ~Inst_VOPC__V_CMP_NLG_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NGT_F64 class methods ---
Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ngt_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_NGT_F64
Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64()
{
} // ~Inst_VOPC__V_CMP_NGT_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NLE_F64 class methods ---
Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nle_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_NLE_F64
Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64()
{
} // ~Inst_VOPC__V_CMP_NLE_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NEQ_F64 class methods ---
Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_neq_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_NEQ_F64
Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64()
{
} // ~Inst_VOPC__V_CMP_NEQ_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NLT_F64 class methods ---
Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_nlt_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_NLT_F64
Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64()
{
} // ~Inst_VOPC__V_CMP_NLT_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_TRU_F64 class methods ---
Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_tru_f64")
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOPC__V_CMP_TRU_F64
Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64()
{
} // ~Inst_VOPC__V_CMP_TRU_F64
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_F_F64 class methods ---
Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_f_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_F_F64
Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64()
{
} // ~Inst_VOPC__V_CMPX_F_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_LT_F64 class methods ---
Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lt_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LT_F64
Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64()
{
} // ~Inst_VOPC__V_CMPX_LT_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_EQ_F64 class methods ---
Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_eq_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_EQ_F64
Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64()
{
} // ~Inst_VOPC__V_CMPX_EQ_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
wf->execMask() = vcc.rawData();
} // execute
// --- Inst_VOPC__V_CMPX_LE_F64 class methods ---
Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_le_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LE_F64
Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64()
{
} // ~Inst_VOPC__V_CMPX_LE_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GT_F64 class methods ---
Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_gt_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GT_F64
Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64()
{
} // ~Inst_VOPC__V_CMPX_GT_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LG_F64 class methods ---
Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lg_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LG_F64
Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64()
{
} // ~Inst_VOPC__V_CMPX_LG_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GE_F64 class methods ---
Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ge_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GE_F64
Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64()
{
} // ~Inst_VOPC__V_CMPX_GE_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_O_F64 class methods ---
Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_o_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_O_F64
Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64()
{
} // ~Inst_VOPC__V_CMPX_O_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (!std::isnan(src0[lane])
&& !std::isnan(src1[lane])) ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_U_F64 class methods ---
Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_u_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_U_F64
Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64()
{
} // ~Inst_VOPC__V_CMPX_U_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, (std::isnan(src0[lane])
|| std::isnan(src1[lane])) ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NGE_F64 class methods ---
Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nge_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NGE_F64
Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64()
{
} // ~Inst_VOPC__V_CMPX_NGE_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NLG_F64 class methods ---
Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nlg_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NLG_F64
Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64()
{
} // ~Inst_VOPC__V_CMPX_NLG_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NGT_F64 class methods ---
Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ngt_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NGT_F64
Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64()
{
} // ~Inst_VOPC__V_CMPX_NGT_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NLE_F64 class methods ---
Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nle_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NLE_F64
Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64()
{
} // ~Inst_VOPC__V_CMPX_NLE_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NEQ_F64 class methods ---
Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_neq_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NEQ_F64
Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64()
{
} // ~Inst_VOPC__V_CMPX_NEQ_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NLT_F64 class methods ---
Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_nlt_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NLT_F64
Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64()
{
} // ~Inst_VOPC__V_CMPX_NLT_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_TRU_F64 class methods ---
Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_tru_f64")
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_TRU_F64
Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64()
{
} // ~Inst_VOPC__V_CMPX_TRU_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_F_I16 class methods ---
Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_f_i16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_F_I16
Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16()
{
} // ~Inst_VOPC__V_CMP_F_I16
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LT_I16 class methods ---
Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lt_i16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LT_I16
Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16()
{
} // ~Inst_VOPC__V_CMP_LT_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_EQ_I16 class methods ---
Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_eq_i16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_EQ_I16
Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16()
{
} // ~Inst_VOPC__V_CMP_EQ_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LE_I16 class methods ---
Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_le_i16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LE_I16
Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16()
{
} // ~Inst_VOPC__V_CMP_LE_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GT_I16 class methods ---
Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_gt_i16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GT_I16
Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16()
{
} // ~Inst_VOPC__V_CMP_GT_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NE_I16 class methods ---
Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ne_i16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_NE_I16
Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16()
{
} // ~Inst_VOPC__V_CMP_NE_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GE_I16 class methods ---
Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ge_i16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GE_I16
Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16()
{
} // ~Inst_VOPC__V_CMP_GE_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_T_I16 class methods ---
Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_t_i16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_T_I16
Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16()
{
} // ~Inst_VOPC__V_CMP_T_I16
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_F_U16 class methods ---
Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_f_u16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_F_U16
Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16()
{
} // ~Inst_VOPC__V_CMP_F_U16
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LT_U16 class methods ---
Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lt_u16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LT_U16
Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16()
{
} // ~Inst_VOPC__V_CMP_LT_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_EQ_U16 class methods ---
Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_eq_u16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_EQ_U16
Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16()
{
} // ~Inst_VOPC__V_CMP_EQ_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LE_U16 class methods ---
Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_le_u16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LE_U16
Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16()
{
} // ~Inst_VOPC__V_CMP_LE_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GT_U16 class methods ---
Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_gt_u16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GT_U16
Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16()
{
} // ~Inst_VOPC__V_CMP_GT_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NE_U16 class methods ---
Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ne_u16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_NE_U16
Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16()
{
} // ~Inst_VOPC__V_CMP_NE_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GE_U16 class methods ---
Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ge_u16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GE_U16
Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16()
{
} // ~Inst_VOPC__V_CMP_GE_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_T_U16 class methods ---
Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_t_u16")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_T_U16
Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16()
{
} // ~Inst_VOPC__V_CMP_T_U16
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_F_I16 class methods ---
Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_f_i16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_F_I16
Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16()
{
} // ~Inst_VOPC__V_CMPX_F_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LT_I16 class methods ---
Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lt_i16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LT_I16
Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16()
{
} // ~Inst_VOPC__V_CMPX_LT_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_EQ_I16 class methods ---
Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_eq_i16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_EQ_I16
Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16()
{
} // ~Inst_VOPC__V_CMPX_EQ_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LE_I16 class methods ---
Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_le_i16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LE_I16
Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16()
{
} // ~Inst_VOPC__V_CMPX_LE_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GT_I16 class methods ---
Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_gt_i16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GT_I16
Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16()
{
} // ~Inst_VOPC__V_CMPX_GT_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NE_I16 class methods ---
Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ne_i16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NE_I16
Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16()
{
} // ~Inst_VOPC__V_CMPX_NE_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GE_I16 class methods ---
Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ge_i16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GE_I16
Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16()
{
} // ~Inst_VOPC__V_CMPX_GE_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_T_I16 class methods ---
Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_t_i16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_T_I16
Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16()
{
} // ~Inst_VOPC__V_CMPX_T_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_F_U16 class methods ---
Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_f_u16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_F_U16
Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16()
{
} // ~Inst_VOPC__V_CMPX_F_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LT_U16 class methods ---
Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lt_u16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LT_U16
Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16()
{
} // ~Inst_VOPC__V_CMPX_LT_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_EQ_U16 class methods ---
Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_eq_u16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_EQ_U16
Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16()
{
} // ~Inst_VOPC__V_CMPX_EQ_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LE_U16 class methods ---
Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_le_u16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LE_U16
Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16()
{
} // ~Inst_VOPC__V_CMPX_LE_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GT_U16 class methods ---
Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_gt_u16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GT_U16
Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16()
{
} // ~Inst_VOPC__V_CMPX_GT_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NE_U16 class methods ---
Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ne_u16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NE_U16
Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16()
{
} // ~Inst_VOPC__V_CMPX_NE_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GE_U16 class methods ---
Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ge_u16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GE_U16
Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16()
{
} // ~Inst_VOPC__V_CMPX_GE_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_T_U16 class methods ---
Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_t_u16")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_T_U16
Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16()
{
} // ~Inst_VOPC__V_CMPX_T_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_F_I32 class methods ---
Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_f_i32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_F_I32
Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32()
{
} // ~Inst_VOPC__V_CMP_F_I32
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LT_I32 class methods ---
Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lt_i32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LT_I32
Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32()
{
} // ~Inst_VOPC__V_CMP_LT_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_EQ_I32 class methods ---
Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_eq_i32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_EQ_I32
Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32()
{
} // ~Inst_VOPC__V_CMP_EQ_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LE_I32 class methods ---
Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_le_i32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LE_I32
Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32()
{
} // ~Inst_VOPC__V_CMP_LE_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GT_I32 class methods ---
Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_gt_i32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GT_I32
Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32()
{
} // ~Inst_VOPC__V_CMP_GT_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NE_I32 class methods ---
Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ne_i32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_NE_I32
Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32()
{
} // ~Inst_VOPC__V_CMP_NE_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GE_I32 class methods ---
Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ge_i32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GE_I32
Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32()
{
} // ~Inst_VOPC__V_CMP_GE_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_T_I32 class methods ---
Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_t_i32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_T_I32
Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32()
{
} // ~Inst_VOPC__V_CMP_T_I32
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_F_U32 class methods ---
Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_f_u32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_F_U32
Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32()
{
} // ~Inst_VOPC__V_CMP_F_U32
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LT_U32 class methods ---
Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lt_u32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LT_U32
Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32()
{
} // ~Inst_VOPC__V_CMP_LT_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_EQ_U32 class methods ---
Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_eq_u32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_EQ_U32
Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32()
{
} // ~Inst_VOPC__V_CMP_EQ_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LE_U32 class methods ---
Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_le_u32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LE_U32
Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32()
{
} // ~Inst_VOPC__V_CMP_LE_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GT_U32 class methods ---
Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_gt_u32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GT_U32
Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32()
{
} // ~Inst_VOPC__V_CMP_GT_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NE_U32 class methods ---
Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ne_u32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_NE_U32
Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32()
{
} // ~Inst_VOPC__V_CMP_NE_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GE_U32 class methods ---
Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ge_u32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GE_U32
Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32()
{
} // ~Inst_VOPC__V_CMP_GE_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_T_U32 class methods ---
Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_t_u32")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_T_U32
Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32()
{
} // ~Inst_VOPC__V_CMP_T_U32
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_F_I32 class methods ---
Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_f_i32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_F_I32
Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32()
{
} // ~Inst_VOPC__V_CMPX_F_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LT_I32 class methods ---
Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lt_i32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LT_I32
Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32()
{
} // ~Inst_VOPC__V_CMPX_LT_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_EQ_I32 class methods ---
Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_eq_i32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_EQ_I32
Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32()
{
} // ~Inst_VOPC__V_CMPX_EQ_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LE_I32 class methods ---
Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_le_i32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LE_I32
Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32()
{
} // ~Inst_VOPC__V_CMPX_LE_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GT_I32 class methods ---
Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_gt_i32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GT_I32
Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32()
{
} // ~Inst_VOPC__V_CMPX_GT_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NE_I32 class methods ---
Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ne_i32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NE_I32
Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32()
{
} // ~Inst_VOPC__V_CMPX_NE_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GE_I32 class methods ---
Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ge_i32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GE_I32
Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32()
{
} // ~Inst_VOPC__V_CMPX_GE_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_T_I32 class methods ---
Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_t_i32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_T_I32
Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32()
{
} // ~Inst_VOPC__V_CMPX_T_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_F_U32 class methods ---
Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_f_u32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_F_U32
Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32()
{
} // ~Inst_VOPC__V_CMPX_F_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LT_U32 class methods ---
Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lt_u32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LT_U32
Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32()
{
} // ~Inst_VOPC__V_CMPX_LT_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_EQ_U32 class methods ---
Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_eq_u32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_EQ_U32
Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32()
{
} // ~Inst_VOPC__V_CMPX_EQ_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LE_U32 class methods ---
Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_le_u32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LE_U32
Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32()
{
} // ~Inst_VOPC__V_CMPX_LE_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GT_U32 class methods ---
Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_gt_u32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GT_U32
Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32()
{
} // ~Inst_VOPC__V_CMPX_GT_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NE_U32 class methods ---
Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ne_u32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NE_U32
Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32()
{
} // ~Inst_VOPC__V_CMPX_NE_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GE_U32 class methods ---
Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ge_u32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GE_U32
Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32()
{
} // ~Inst_VOPC__V_CMPX_GE_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_T_U32 class methods ---
Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_t_u32")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_T_U32
Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32()
{
} // ~Inst_VOPC__V_CMPX_T_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_F_I64 class methods ---
Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_f_i64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_F_I64
Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64()
{
} // ~Inst_VOPC__V_CMP_F_I64
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LT_I64 class methods ---
Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lt_i64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LT_I64
Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64()
{
} // ~Inst_VOPC__V_CMP_LT_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_EQ_I64 class methods ---
Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_eq_i64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_EQ_I64
Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64()
{
} // ~Inst_VOPC__V_CMP_EQ_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LE_I64 class methods ---
Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_le_i64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LE_I64
Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64()
{
} // ~Inst_VOPC__V_CMP_LE_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GT_I64 class methods ---
Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_gt_i64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GT_I64
Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64()
{
} // ~Inst_VOPC__V_CMP_GT_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NE_I64 class methods ---
Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ne_i64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_NE_I64
Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64()
{
} // ~Inst_VOPC__V_CMP_NE_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GE_I64 class methods ---
Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ge_i64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GE_I64
Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64()
{
} // ~Inst_VOPC__V_CMP_GE_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_T_I64 class methods ---
Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_t_i64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_T_I64
Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64()
{
} // ~Inst_VOPC__V_CMP_T_I64
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_F_U64 class methods ---
Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_f_u64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_F_U64
Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64()
{
} // ~Inst_VOPC__V_CMP_F_U64
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LT_U64 class methods ---
Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_lt_u64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LT_U64
Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64()
{
} // ~Inst_VOPC__V_CMP_LT_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_EQ_U64 class methods ---
Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_eq_u64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_EQ_U64
Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64()
{
} // ~Inst_VOPC__V_CMP_EQ_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_LE_U64 class methods ---
Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_le_u64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_LE_U64
Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64()
{
} // ~Inst_VOPC__V_CMP_LE_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GT_U64 class methods ---
Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_gt_u64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GT_U64
Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64()
{
} // ~Inst_VOPC__V_CMP_GT_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_NE_U64 class methods ---
Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ne_u64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_NE_U64
Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64()
{
} // ~Inst_VOPC__V_CMP_NE_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_GE_U64 class methods ---
Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_ge_u64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_GE_U64
Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64()
{
} // ~Inst_VOPC__V_CMP_GE_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMP_T_U64 class methods ---
Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmp_t_u64")
{
setFlag(ALU);
} // Inst_VOPC__V_CMP_T_U64
Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64()
{
} // ~Inst_VOPC__V_CMP_T_U64
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_F_I64 class methods ---
Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_f_i64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_F_I64
Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64()
{
} // ~Inst_VOPC__V_CMPX_F_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LT_I64 class methods ---
Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lt_i64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LT_I64
Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64()
{
} // ~Inst_VOPC__V_CMPX_LT_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_EQ_I64 class methods ---
Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_eq_i64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_EQ_I64
Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64()
{
} // ~Inst_VOPC__V_CMPX_EQ_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LE_I64 class methods ---
Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_le_i64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LE_I64
Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64()
{
} // ~Inst_VOPC__V_CMPX_LE_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GT_I64 class methods ---
Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_gt_i64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GT_I64
Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64()
{
} // ~Inst_VOPC__V_CMPX_GT_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NE_I64 class methods ---
Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ne_i64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NE_I64
Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64()
{
} // ~Inst_VOPC__V_CMPX_NE_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GE_I64 class methods ---
Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ge_i64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GE_I64
Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64()
{
} // ~Inst_VOPC__V_CMPX_GE_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_T_I64 class methods ---
Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_t_i64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_T_I64
Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64()
{
} // ~Inst_VOPC__V_CMPX_T_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_F_U64 class methods ---
Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_f_u64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_F_U64
Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64()
{
} // ~Inst_VOPC__V_CMPX_F_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LT_U64 class methods ---
Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_lt_u64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LT_U64
Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64()
{
} // ~Inst_VOPC__V_CMPX_LT_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_EQ_U64 class methods ---
Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_eq_u64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_EQ_U64
Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64()
{
} // ~Inst_VOPC__V_CMPX_EQ_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_LE_U64 class methods ---
Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_le_u64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_LE_U64
Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64()
{
} // ~Inst_VOPC__V_CMPX_LE_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GT_U64 class methods ---
Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_gt_u64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GT_U64
Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64()
{
} // ~Inst_VOPC__V_CMPX_GT_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_NE_U64 class methods ---
Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ne_u64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_NE_U64
Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64()
{
} // ~Inst_VOPC__V_CMPX_NE_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_GE_U64 class methods ---
Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_ge_u64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_GE_U64
Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64()
{
} // ~Inst_VOPC__V_CMPX_GE_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VOPC__V_CMPX_T_U64 class methods ---
Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt)
: Inst_VOPC(iFmt, "v_cmpx_t_u64")
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOPC__V_CMPX_T_U64
Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64()
{
} // ~Inst_VOPC__V_CMPX_T_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, 1);
}
}
wf->execMask() = vcc.rawData();
vcc.write();
} // execute
// --- Inst_VINTRP__V_INTERP_P1_F32 class methods ---
Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32(
InFmt_VINTRP *iFmt)
: Inst_VINTRP(iFmt, "v_interp_p1_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VINTRP__V_INTERP_P1_F32
Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32()
{
} // ~Inst_VINTRP__V_INTERP_P1_F32
// --- description from .arch file ---
// D.f = P10 * S.f + P0; parameter interpolation (SQ translates to
// V_MAD_F32 for SP).
// CAUTION: when in HALF_LDS mode, D must not be the same GPR as S;
// if D == S then data corruption will occur.
// NOTE: In textual representations the I/J VGPR is the first source and
// the attribute is the second source; however in the VOP3 encoding the
// attribute is stored in the src0 field and the VGPR is stored in the
// src1 field.
void
Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VINTRP__V_INTERP_P2_F32 class methods ---
Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32(
InFmt_VINTRP *iFmt)
: Inst_VINTRP(iFmt, "v_interp_p2_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VINTRP__V_INTERP_P2_F32
Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32()
{
} // ~Inst_VINTRP__V_INTERP_P2_F32
// --- description from .arch file ---
// D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to
// V_MAD_F32 for SP).
// NOTE: In textual representations the I/J VGPR is the first source and
// the attribute is the second source; however in the VOP3 encoding the
// attribute is stored in the src0 field and the VGPR is stored in the
// src1 field.
void
Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VINTRP__V_INTERP_MOV_F32 class methods ---
Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32(
InFmt_VINTRP *iFmt)
: Inst_VINTRP(iFmt, "v_interp_mov_f32")
{
setFlag(ALU);
setFlag(F32);
} // Inst_VINTRP__V_INTERP_MOV_F32
Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32()
{
} // ~Inst_VINTRP__V_INTERP_MOV_F32
// --- description from .arch file ---
// D.f = {P10,P20,P0}[S.u]; parameter load.
void
Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_CLASS_F32 class methods ---
Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_class_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_CLASS_F32
Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32()
{
} // ~Inst_VOP3__V_CMP_CLASS_F32
// --- description from .arch file ---
// VCC = IEEE numeric class function specified in S1.u, performed on S0.f
// The function reports true if the floating point value is *any* of the
// --- numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
// is NaN
if (std::isnan(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 2)) {
// is -infinity
if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 3)) {
// is -normal
if (std::isnormal(src0[lane])
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 4)) {
// is -denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 5)) {
// is -zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 6)) {
// is +zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 7)) {
// is +denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 8)) {
// is +normal
if (std::isnormal(src0[lane])
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 9)) {
// is +infinity
if (std::isinf(src0[lane])
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_CLASS_F32 class methods ---
Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_class_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_CLASS_F32
Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32()
{
} // ~Inst_VOP3__V_CMPX_CLASS_F32
// --- description from .arch file ---
// EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
// S0.f
// The function reports true if the floating point value is *any* of the
// numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
// is NaN
if (std::isnan(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 2)) {
// is -infinity
if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 3)) {
// is -normal
if (std::isnormal(src0[lane])
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 4)) {
// is -denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 5)) {
// is -zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 6)) {
// is +zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 7)) {
// is +denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 8)) {
// is +normal
if (std::isnormal(src0[lane])
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 9)) {
// is +infinity
if (std::isinf(src0[lane])
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_CLASS_F64 class methods ---
Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_class_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_CLASS_F64
Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64()
{
} // ~Inst_VOP3__V_CMP_CLASS_F64
// --- description from .arch file ---
// VCC = IEEE numeric class function specified in S1.u, performed on S0.d
// The function reports true if the floating point value is *any* of the
// --- numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
// is NaN
if (std::isnan(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 2)) {
// is -infinity
if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 3)) {
// is -normal
if (std::isnormal(src0[lane])
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 4)) {
// is -denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 5)) {
// is -zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 6)) {
// is +zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 7)) {
// is +denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 8)) {
// is +normal
if (std::isnormal(src0[lane])
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 9)) {
// is +infinity
if (std::isinf(src0[lane])
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_CLASS_F64 class methods ---
Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_class_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_CLASS_F64
Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64()
{
} // ~Inst_VOP3__V_CMPX_CLASS_F64
// --- description from .arch file ---
// EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
// S0.d
// The function reports true if the floating point value is *any* of the
// numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
// is NaN
if (std::isnan(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 2)) {
// is -infinity
if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 3)) {
// is -normal
if (std::isnormal(src0[lane])
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 4)) {
// is -denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 5)) {
// is -zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 6)) {
// is +zero
if (std::fpclassify(src0[lane]) == FP_ZERO
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 7)) {
// is +denormal
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 8)) {
// is +normal
if (std::isnormal(src0[lane])
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
if (bits(src1[lane], 9)) {
// is +infinity
if (std::isinf(src0[lane])
&& !std::signbit(src0[lane])) {
sdst.setBit(lane, 1);
continue;
}
}
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_CLASS_F16 class methods ---
Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_class_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_CLASS_F16
Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16()
{
} // ~Inst_VOP3__V_CMP_CLASS_F16
// --- description from .arch file ---
// VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
// The function reports true if the floating point value is *any* of the
// --- numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_CLASS_F16 class methods ---
Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_class_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_CLASS_F16
Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16()
{
} // ~Inst_VOP3__V_CMPX_CLASS_F16
// --- description from .arch file ---
// EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
// --- S0.f16
// The function reports true if the floating point value is *any* of the
// --- numeric types selected in S1.u according to the following list:
// S1.u[0] -- value is a signaling NaN.
// S1.u[1] -- value is a quiet NaN.
// S1.u[2] -- value is negative infinity.
// S1.u[3] -- value is a negative normal value.
// S1.u[4] -- value is a negative denormal value.
// S1.u[5] -- value is negative zero.
// S1.u[6] -- value is positive zero.
// S1.u[7] -- value is a positive denormal value.
// S1.u[8] -- value is a positive normal value.
// S1.u[9] -- value is positive infinity.
void
Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_F_F16 class methods ---
Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_f_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_F_F16
Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16()
{
} // ~Inst_VOP3__V_CMP_F_F16
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_LT_F16 class methods ---
Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lt_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_LT_F16
Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16()
{
} // ~Inst_VOP3__V_CMP_LT_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_EQ_F16 class methods ---
Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_eq_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_EQ_F16
Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16()
{
} // ~Inst_VOP3__V_CMP_EQ_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_LE_F16 class methods ---
Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_le_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_LE_F16
Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16()
{
} // ~Inst_VOP3__V_CMP_LE_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_GT_F16 class methods ---
Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_gt_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_GT_F16
Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16()
{
} // ~Inst_VOP3__V_CMP_GT_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_LG_F16 class methods ---
Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lg_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_LG_F16
Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16()
{
} // ~Inst_VOP3__V_CMP_LG_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_GE_F16 class methods ---
Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ge_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_GE_F16
Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16()
{
} // ~Inst_VOP3__V_CMP_GE_F16
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_O_F16 class methods ---
Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_o_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_O_F16
Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16()
{
} // ~Inst_VOP3__V_CMP_O_F16
// --- description from .arch file ---
// D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_U_F16 class methods ---
Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_u_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_U_F16
Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16()
{
} // ~Inst_VOP3__V_CMP_U_F16
// --- description from .arch file ---
// D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_NGE_F16 class methods ---
Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nge_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_NGE_F16
Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16()
{
} // ~Inst_VOP3__V_CMP_NGE_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_NLG_F16 class methods ---
Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nlg_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_NLG_F16
Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16()
{
} // ~Inst_VOP3__V_CMP_NLG_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_NGT_F16 class methods ---
Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ngt_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_NGT_F16
Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16()
{
} // ~Inst_VOP3__V_CMP_NGT_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_NLE_F16 class methods ---
Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nle_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_NLE_F16
Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16()
{
} // ~Inst_VOP3__V_CMP_NLE_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_NEQ_F16 class methods ---
Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_neq_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_NEQ_F16
Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16()
{
} // ~Inst_VOP3__V_CMP_NEQ_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_NLT_F16 class methods ---
Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nlt_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_NLT_F16
Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16()
{
} // ~Inst_VOP3__V_CMP_NLT_F16
// --- description from .arch file ---
// D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMP_TRU_F16 class methods ---
Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_tru_f16", true)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CMP_TRU_F16
Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16()
{
} // ~Inst_VOP3__V_CMP_TRU_F16
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_F_F16 class methods ---
Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_f_f16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_F_F16
Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16()
{
} // ~Inst_VOP3__V_CMPX_F_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LT_F16 class methods ---
Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lt_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LT_F16
Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16()
{
} // ~Inst_VOP3__V_CMPX_LT_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_EQ_F16 class methods ---
Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_eq_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_EQ_F16
Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16()
{
} // ~Inst_VOP3__V_CMPX_EQ_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_LE_F16 class methods ---
Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_le_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LE_F16
Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16()
{
} // ~Inst_VOP3__V_CMPX_LE_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_GT_F16 class methods ---
Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_gt_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GT_F16
Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16()
{
} // ~Inst_VOP3__V_CMPX_GT_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_LG_F16 class methods ---
Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lg_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LG_F16
Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16()
{
} // ~Inst_VOP3__V_CMPX_LG_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_GE_F16 class methods ---
Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ge_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GE_F16
Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16()
{
} // ~Inst_VOP3__V_CMPX_GE_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_O_F16 class methods ---
Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_o_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_O_F16
Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16()
{
} // ~Inst_VOP3__V_CMPX_O_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_U_F16 class methods ---
Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_u_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_U_F16
Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16()
{
} // ~Inst_VOP3__V_CMPX_U_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_NGE_F16 class methods ---
Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nge_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NGE_F16
Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16()
{
} // ~Inst_VOP3__V_CMPX_NGE_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_NLG_F16 class methods ---
Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nlg_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NLG_F16
Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16()
{
} // ~Inst_VOP3__V_CMPX_NLG_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_NGT_F16 class methods ---
Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ngt_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NGT_F16
Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16()
{
} // ~Inst_VOP3__V_CMPX_NGT_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_NLE_F16 class methods ---
Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nle_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NLE_F16
Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16()
{
} // ~Inst_VOP3__V_CMPX_NLE_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_NEQ_F16 class methods ---
Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_neq_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NEQ_F16
Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16()
{
} // ~Inst_VOP3__V_CMPX_NEQ_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_NLT_F16 class methods ---
Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nlt_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NLT_F16
Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16()
{
} // ~Inst_VOP3__V_CMPX_NLT_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CMPX_TRU_F16 class methods ---
Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_tru_f16", true)
{
setFlag(ALU);
setFlag(F16);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_TRU_F16
Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16()
{
} // ~Inst_VOP3__V_CMPX_TRU_F16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_F_F32 class methods ---
Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_f_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_F_F32
Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32()
{
} // ~Inst_VOP3__V_CMP_F_F32
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LT_F32 class methods ---
Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lt_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_LT_F32
Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32()
{
} // ~Inst_VOP3__V_CMP_LT_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_EQ_F32 class methods ---
Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_eq_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_EQ_F32
Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32()
{
} // ~Inst_VOP3__V_CMP_EQ_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LE_F32 class methods ---
Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_le_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_LE_F32
Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32()
{
} // ~Inst_VOP3__V_CMP_LE_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GT_F32 class methods ---
Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_gt_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_GT_F32
Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32()
{
} // ~Inst_VOP3__V_CMP_GT_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LG_F32 class methods ---
Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lg_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_LG_F32
Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32()
{
} // ~Inst_VOP3__V_CMP_LG_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GE_F32 class methods ---
Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ge_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_GE_F32
Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32()
{
} // ~Inst_VOP3__V_CMP_GE_F32
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_O_F32 class methods ---
Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_o_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_O_F32
Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32()
{
} // ~Inst_VOP3__V_CMP_O_F32
// --- description from .arch file ---
// D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (!std::isnan(src0[lane])
&& !std::isnan(src1[lane])) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_U_F32 class methods ---
Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_u_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_U_F32
Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32()
{
} // ~Inst_VOP3__V_CMP_U_F32
// --- description from .arch file ---
// D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (std::isnan(src0[lane])
|| std::isnan(src1[lane])) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NGE_F32 class methods ---
Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nge_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_NGE_F32
Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32()
{
} // ~Inst_VOP3__V_CMP_NGE_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NLG_F32 class methods ---
Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nlg_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_NLG_F32
Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32()
{
} // ~Inst_VOP3__V_CMP_NLG_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NGT_F32 class methods ---
Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ngt_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_NGT_F32
Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32()
{
} // ~Inst_VOP3__V_CMP_NGT_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NLE_F32 class methods ---
Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nle_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_NLE_F32
Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32()
{
} // ~Inst_VOP3__V_CMP_NLE_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NEQ_F32 class methods ---
Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_neq_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_NEQ_F32
Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32()
{
} // ~Inst_VOP3__V_CMP_NEQ_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NLT_F32 class methods ---
Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nlt_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_NLT_F32
Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32()
{
} // ~Inst_VOP3__V_CMP_NLT_F32
// --- description from .arch file ---
// D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_TRU_F32 class methods ---
Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_tru_f32", true)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CMP_TRU_F32
Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32()
{
} // ~Inst_VOP3__V_CMP_TRU_F32
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_F_F32 class methods ---
Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_f_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_F_F32
Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32()
{
} // ~Inst_VOP3__V_CMPX_F_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LT_F32 class methods ---
Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lt_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LT_F32
Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32()
{
} // ~Inst_VOP3__V_CMPX_LT_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_EQ_F32 class methods ---
Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_eq_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_EQ_F32
Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32()
{
} // ~Inst_VOP3__V_CMPX_EQ_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LE_F32 class methods ---
Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_le_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LE_F32
Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32()
{
} // ~Inst_VOP3__V_CMPX_LE_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GT_F32 class methods ---
Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_gt_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GT_F32
Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32()
{
} // ~Inst_VOP3__V_CMPX_GT_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LG_F32 class methods ---
Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lg_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LG_F32
Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32()
{
} // ~Inst_VOP3__V_CMPX_LG_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GE_F32 class methods ---
Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ge_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GE_F32
Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32()
{
} // ~Inst_VOP3__V_CMPX_GE_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_O_F32 class methods ---
Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_o_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_O_F32
Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32()
{
} // ~Inst_VOP3__V_CMPX_O_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (!std::isnan(src0[lane])
&& !std::isnan(src1[lane])) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_U_F32 class methods ---
Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_u_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_U_F32
Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32()
{
} // ~Inst_VOP3__V_CMPX_U_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (std::isnan(src0[lane])
|| std::isnan(src1[lane])) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NGE_F32 class methods ---
Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nge_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NGE_F32
Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32()
{
} // ~Inst_VOP3__V_CMPX_NGE_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NLG_F32 class methods ---
Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nlg_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NLG_F32
Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32()
{
} // ~Inst_VOP3__V_CMPX_NLG_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NGT_F32 class methods ---
Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ngt_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NGT_F32
Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32()
{
} // ~Inst_VOP3__V_CMPX_NGT_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NLE_F32 class methods ---
Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nle_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NLE_F32
Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32()
{
} // ~Inst_VOP3__V_CMPX_NLE_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NEQ_F32 class methods ---
Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_neq_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NEQ_F32
Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32()
{
} // ~Inst_VOP3__V_CMPX_NEQ_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NLT_F32 class methods ---
Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nlt_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NLT_F32
Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32()
{
} // ~Inst_VOP3__V_CMPX_NLT_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_TRU_F32 class methods ---
Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_tru_f32", true)
{
setFlag(ALU);
setFlag(F32);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_TRU_F32
Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32()
{
} // ~Inst_VOP3__V_CMPX_TRU_F32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_F_F64 class methods ---
Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_f_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_F_F64
Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64()
{
} // ~Inst_VOP3__V_CMP_F_F64
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LT_F64 class methods ---
Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lt_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_LT_F64
Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64()
{
} // ~Inst_VOP3__V_CMP_LT_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_EQ_F64 class methods ---
Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_eq_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_EQ_F64
Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64()
{
} // ~Inst_VOP3__V_CMP_EQ_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LE_F64 class methods ---
Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_le_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_LE_F64
Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64()
{
} // ~Inst_VOP3__V_CMP_LE_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GT_F64 class methods ---
Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_gt_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_GT_F64
Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64()
{
} // ~Inst_VOP3__V_CMP_GT_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LG_F64 class methods ---
Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lg_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_LG_F64
Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64()
{
} // ~Inst_VOP3__V_CMP_LG_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GE_F64 class methods ---
Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ge_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_GE_F64
Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64()
{
} // ~Inst_VOP3__V_CMP_GE_F64
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_O_F64 class methods ---
Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_o_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_O_F64
Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64()
{
} // ~Inst_VOP3__V_CMP_O_F64
// --- description from .arch file ---
// D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (!std::isnan(src0[lane])
&& !std::isnan(src1[lane])) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_U_F64 class methods ---
Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_u_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_U_F64
Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64()
{
} // ~Inst_VOP3__V_CMP_U_F64
// --- description from .arch file ---
// D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (std::isnan(src0[lane])
|| std::isnan(src1[lane])) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NGE_F64 class methods ---
Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nge_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_NGE_F64
Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64()
{
} // ~Inst_VOP3__V_CMP_NGE_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NLG_F64 class methods ---
Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nlg_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_NLG_F64
Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64()
{
} // ~Inst_VOP3__V_CMP_NLG_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NGT_F64 class methods ---
Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ngt_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_NGT_F64
Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64()
{
} // ~Inst_VOP3__V_CMP_NGT_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NLE_F64 class methods ---
Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nle_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_NLE_F64
Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64()
{
} // ~Inst_VOP3__V_CMP_NLE_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NEQ_F64 class methods ---
Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_neq_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_NEQ_F64
Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64()
{
} // ~Inst_VOP3__V_CMP_NEQ_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NLT_F64 class methods ---
Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_nlt_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_NLT_F64
Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64()
{
} // ~Inst_VOP3__V_CMP_NLT_F64
// --- description from .arch file ---
// D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_TRU_F64 class methods ---
Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_tru_f64", true)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CMP_TRU_F64
Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64()
{
} // ~Inst_VOP3__V_CMP_TRU_F64
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_F_F64 class methods ---
Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_f_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_F_F64
Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64()
{
} // ~Inst_VOP3__V_CMPX_F_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LT_F64 class methods ---
Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lt_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LT_F64
Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64()
{
} // ~Inst_VOP3__V_CMPX_LT_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_EQ_F64 class methods ---
Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_eq_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_EQ_F64
Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64()
{
} // ~Inst_VOP3__V_CMPX_EQ_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LE_F64 class methods ---
Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_le_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LE_F64
Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64()
{
} // ~Inst_VOP3__V_CMPX_LE_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GT_F64 class methods ---
Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_gt_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GT_F64
Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64()
{
} // ~Inst_VOP3__V_CMPX_GT_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LG_F64 class methods ---
Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lg_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LG_F64
Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64()
{
} // ~Inst_VOP3__V_CMPX_LG_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GE_F64 class methods ---
Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ge_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GE_F64
Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64()
{
} // ~Inst_VOP3__V_CMPX_GE_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_O_F64 class methods ---
Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_o_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_O_F64
Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64()
{
} // ~Inst_VOP3__V_CMPX_O_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (!std::isnan(src0[lane])
&& !std::isnan(src1[lane])) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_U_F64 class methods ---
Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_u_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_U_F64
Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64()
{
} // ~Inst_VOP3__V_CMPX_U_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
// encoding.
void
Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, (std::isnan(src0[lane])
|| std::isnan(src1[lane])) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NGE_F64 class methods ---
Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nge_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NGE_F64
Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64()
{
} // ~Inst_VOP3__V_CMPX_NGE_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NLG_F64 class methods ---
Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nlg_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NLG_F64
Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64()
{
} // ~Inst_VOP3__V_CMPX_NLG_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] < src1[lane]
|| src0[lane] > src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NGT_F64 class methods ---
Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ngt_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NGT_F64
Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64()
{
} // ~Inst_VOP3__V_CMPX_NGT_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NLE_F64 class methods ---
Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nle_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NLE_F64
Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64()
{
} // ~Inst_VOP3__V_CMPX_NLE_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NEQ_F64 class methods ---
Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_neq_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NEQ_F64
Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64()
{
} // ~Inst_VOP3__V_CMPX_NEQ_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NLT_F64 class methods ---
Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_nlt_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NLT_F64
Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64()
{
} // ~Inst_VOP3__V_CMPX_NLT_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_TRU_F64 class methods ---
Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_tru_f64", true)
{
setFlag(ALU);
setFlag(F64);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_TRU_F64
Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64()
{
} // ~Inst_VOP3__V_CMPX_TRU_F64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_F_I16 class methods ---
Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_f_i16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_F_I16
Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16()
{
} // ~Inst_VOP3__V_CMP_F_I16
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LT_I16 class methods ---
Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lt_i16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LT_I16
Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16()
{
} // ~Inst_VOP3__V_CMP_LT_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_EQ_I16 class methods ---
Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_eq_i16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_EQ_I16
Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16()
{
} // ~Inst_VOP3__V_CMP_EQ_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LE_I16 class methods ---
Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_le_i16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LE_I16
Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16()
{
} // ~Inst_VOP3__V_CMP_LE_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GT_I16 class methods ---
Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_gt_i16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GT_I16
Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16()
{
} // ~Inst_VOP3__V_CMP_GT_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NE_I16 class methods ---
Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ne_i16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_NE_I16
Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16()
{
} // ~Inst_VOP3__V_CMP_NE_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GE_I16 class methods ---
Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ge_i16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GE_I16
Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16()
{
} // ~Inst_VOP3__V_CMP_GE_I16
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_T_I16 class methods ---
Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_t_i16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_T_I16
Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16()
{
} // ~Inst_VOP3__V_CMP_T_I16
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_F_U16 class methods ---
Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_f_u16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_F_U16
Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16()
{
} // ~Inst_VOP3__V_CMP_F_U16
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LT_U16 class methods ---
Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lt_u16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LT_U16
Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16()
{
} // ~Inst_VOP3__V_CMP_LT_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_EQ_U16 class methods ---
Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_eq_u16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_EQ_U16
Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16()
{
} // ~Inst_VOP3__V_CMP_EQ_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LE_U16 class methods ---
Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_le_u16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LE_U16
Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16()
{
} // ~Inst_VOP3__V_CMP_LE_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GT_U16 class methods ---
Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_gt_u16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GT_U16
Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16()
{
} // ~Inst_VOP3__V_CMP_GT_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NE_U16 class methods ---
Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ne_u16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_NE_U16
Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16()
{
} // ~Inst_VOP3__V_CMP_NE_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GE_U16 class methods ---
Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ge_u16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GE_U16
Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16()
{
} // ~Inst_VOP3__V_CMP_GE_U16
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_T_U16 class methods ---
Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_t_u16", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_T_U16
Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16()
{
} // ~Inst_VOP3__V_CMP_T_U16
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_F_I16 class methods ---
Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_f_i16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_F_I16
Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16()
{
} // ~Inst_VOP3__V_CMPX_F_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LT_I16 class methods ---
Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lt_i16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LT_I16
Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16()
{
} // ~Inst_VOP3__V_CMPX_LT_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_EQ_I16 class methods ---
Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_eq_i16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_EQ_I16
Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16()
{
} // ~Inst_VOP3__V_CMPX_EQ_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LE_I16 class methods ---
Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_le_i16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LE_I16
Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16()
{
} // ~Inst_VOP3__V_CMPX_LE_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GT_I16 class methods ---
Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_gt_i16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GT_I16
Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16()
{
} // ~Inst_VOP3__V_CMPX_GT_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NE_I16 class methods ---
Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ne_i16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NE_I16
Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16()
{
} // ~Inst_VOP3__V_CMPX_NE_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GE_I16 class methods ---
Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ge_i16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GE_I16
Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16()
{
} // ~Inst_VOP3__V_CMPX_GE_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_T_I16 class methods ---
Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_t_i16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_T_I16
Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16()
{
} // ~Inst_VOP3__V_CMPX_T_I16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_F_U16 class methods ---
Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_f_u16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_F_U16
Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16()
{
} // ~Inst_VOP3__V_CMPX_F_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LT_U16 class methods ---
Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lt_u16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LT_U16
Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16()
{
} // ~Inst_VOP3__V_CMPX_LT_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_EQ_U16 class methods ---
Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_eq_u16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_EQ_U16
Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16()
{
} // ~Inst_VOP3__V_CMPX_EQ_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LE_U16 class methods ---
Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_le_u16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LE_U16
Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16()
{
} // ~Inst_VOP3__V_CMPX_LE_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GT_U16 class methods ---
Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_gt_u16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GT_U16
Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16()
{
} // ~Inst_VOP3__V_CMPX_GT_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NE_U16 class methods ---
Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ne_u16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NE_U16
Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16()
{
} // ~Inst_VOP3__V_CMPX_NE_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GE_U16 class methods ---
Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ge_u16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GE_U16
Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16()
{
} // ~Inst_VOP3__V_CMPX_GE_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_T_U16 class methods ---
Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_t_u16", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_T_U16
Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16()
{
} // ~Inst_VOP3__V_CMPX_T_U16
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_F_I32 class methods ---
Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_f_i32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_F_I32
Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32()
{
} // ~Inst_VOP3__V_CMP_F_I32
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LT_I32 class methods ---
Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lt_i32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LT_I32
Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32()
{
} // ~Inst_VOP3__V_CMP_LT_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_EQ_I32 class methods ---
Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_eq_i32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_EQ_I32
Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32()
{
} // ~Inst_VOP3__V_CMP_EQ_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LE_I32 class methods ---
Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_le_i32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LE_I32
Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32()
{
} // ~Inst_VOP3__V_CMP_LE_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GT_I32 class methods ---
Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_gt_i32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GT_I32
Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32()
{
} // ~Inst_VOP3__V_CMP_GT_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NE_I32 class methods ---
Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ne_i32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_NE_I32
Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32()
{
} // ~Inst_VOP3__V_CMP_NE_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GE_I32 class methods ---
Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ge_i32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GE_I32
Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32()
{
} // ~Inst_VOP3__V_CMP_GE_I32
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_T_I32 class methods ---
Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_t_i32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_T_I32
Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32()
{
} // ~Inst_VOP3__V_CMP_T_I32
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_F_U32 class methods ---
Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_f_u32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_F_U32
Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32()
{
} // ~Inst_VOP3__V_CMP_F_U32
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LT_U32 class methods ---
Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lt_u32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LT_U32
Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32()
{
} // ~Inst_VOP3__V_CMP_LT_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_EQ_U32 class methods ---
Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_eq_u32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_EQ_U32
Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32()
{
} // ~Inst_VOP3__V_CMP_EQ_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LE_U32 class methods ---
Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_le_u32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LE_U32
Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32()
{
} // ~Inst_VOP3__V_CMP_LE_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GT_U32 class methods ---
Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_gt_u32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GT_U32
Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32()
{
} // ~Inst_VOP3__V_CMP_GT_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NE_U32 class methods ---
Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ne_u32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_NE_U32
Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32()
{
} // ~Inst_VOP3__V_CMP_NE_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GE_U32 class methods ---
Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ge_u32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GE_U32
Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32()
{
} // ~Inst_VOP3__V_CMP_GE_U32
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_T_U32 class methods ---
Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_t_u32", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_T_U32
Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32()
{
} // ~Inst_VOP3__V_CMP_T_U32
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_F_I32 class methods ---
Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_f_i32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_F_I32
Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32()
{
} // ~Inst_VOP3__V_CMPX_F_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LT_I32 class methods ---
Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lt_i32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LT_I32
Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32()
{
} // ~Inst_VOP3__V_CMPX_LT_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_EQ_I32 class methods ---
Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_eq_i32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_EQ_I32
Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32()
{
} // ~Inst_VOP3__V_CMPX_EQ_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LE_I32 class methods ---
Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_le_i32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LE_I32
Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32()
{
} // ~Inst_VOP3__V_CMPX_LE_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GT_I32 class methods ---
Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_gt_i32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GT_I32
Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32()
{
} // ~Inst_VOP3__V_CMPX_GT_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NE_I32 class methods ---
Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ne_i32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NE_I32
Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32()
{
} // ~Inst_VOP3__V_CMPX_NE_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GE_I32 class methods ---
Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ge_i32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GE_I32
Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32()
{
} // ~Inst_VOP3__V_CMPX_GE_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_T_I32 class methods ---
Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_t_i32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_T_I32
Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32()
{
} // ~Inst_VOP3__V_CMPX_T_I32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_F_U32 class methods ---
Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_f_u32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_F_U32
Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32()
{
} // ~Inst_VOP3__V_CMPX_F_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LT_U32 class methods ---
Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lt_u32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LT_U32
Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32()
{
} // ~Inst_VOP3__V_CMPX_LT_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_EQ_U32 class methods ---
Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_eq_u32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_EQ_U32
Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32()
{
} // ~Inst_VOP3__V_CMPX_EQ_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LE_U32 class methods ---
Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_le_u32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LE_U32
Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32()
{
} // ~Inst_VOP3__V_CMPX_LE_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GT_U32 class methods ---
Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_gt_u32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GT_U32
Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32()
{
} // ~Inst_VOP3__V_CMPX_GT_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NE_U32 class methods ---
Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ne_u32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NE_U32
Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32()
{
} // ~Inst_VOP3__V_CMPX_NE_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GE_U32 class methods ---
Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ge_u32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GE_U32
Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32()
{
} // ~Inst_VOP3__V_CMPX_GE_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_T_U32 class methods ---
Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_t_u32", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_T_U32
Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32()
{
} // ~Inst_VOP3__V_CMPX_T_U32
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_F_I64 class methods ---
Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_f_i64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_F_I64
Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64()
{
} // ~Inst_VOP3__V_CMP_F_I64
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LT_I64 class methods ---
Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lt_i64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LT_I64
Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64()
{
} // ~Inst_VOP3__V_CMP_LT_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_EQ_I64 class methods ---
Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_eq_i64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_EQ_I64
Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64()
{
} // ~Inst_VOP3__V_CMP_EQ_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LE_I64 class methods ---
Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_le_i64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LE_I64
Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64()
{
} // ~Inst_VOP3__V_CMP_LE_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GT_I64 class methods ---
Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_gt_i64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GT_I64
Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64()
{
} // ~Inst_VOP3__V_CMP_GT_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NE_I64 class methods ---
Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ne_i64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_NE_I64
Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64()
{
} // ~Inst_VOP3__V_CMP_NE_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GE_I64 class methods ---
Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ge_i64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GE_I64
Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64()
{
} // ~Inst_VOP3__V_CMP_GE_I64
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_T_I64 class methods ---
Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_t_i64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_T_I64
Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64()
{
} // ~Inst_VOP3__V_CMP_T_I64
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_F_U64 class methods ---
Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_f_u64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_F_U64
Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64()
{
} // ~Inst_VOP3__V_CMP_F_U64
// --- description from .arch file ---
// D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LT_U64 class methods ---
Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_lt_u64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LT_U64
Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64()
{
} // ~Inst_VOP3__V_CMP_LT_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_EQ_U64 class methods ---
Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_eq_u64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_EQ_U64
Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64()
{
} // ~Inst_VOP3__V_CMP_EQ_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_LE_U64 class methods ---
Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_le_u64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_LE_U64
Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64()
{
} // ~Inst_VOP3__V_CMP_LE_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GT_U64 class methods ---
Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_gt_u64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GT_U64
Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64()
{
} // ~Inst_VOP3__V_CMP_GT_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_NE_U64 class methods ---
Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ne_u64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_NE_U64
Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64()
{
} // ~Inst_VOP3__V_CMP_NE_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_GE_U64 class methods ---
Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_ge_u64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_GE_U64
Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64()
{
} // ~Inst_VOP3__V_CMP_GE_U64
// --- description from .arch file ---
// D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMP_T_U64 class methods ---
Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmp_t_u64", true)
{
setFlag(ALU);
} // Inst_VOP3__V_CMP_T_U64
Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64()
{
} // ~Inst_VOP3__V_CMP_T_U64
// --- description from .arch file ---
// D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_F_I64 class methods ---
Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_f_i64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_F_I64
Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64()
{
} // ~Inst_VOP3__V_CMPX_F_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LT_I64 class methods ---
Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lt_i64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LT_I64
Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64()
{
} // ~Inst_VOP3__V_CMPX_LT_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_EQ_I64 class methods ---
Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_eq_i64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_EQ_I64
Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64()
{
} // ~Inst_VOP3__V_CMPX_EQ_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LE_I64 class methods ---
Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_le_i64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LE_I64
Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64()
{
} // ~Inst_VOP3__V_CMPX_LE_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GT_I64 class methods ---
Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_gt_i64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GT_I64
Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64()
{
} // ~Inst_VOP3__V_CMPX_GT_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NE_I64 class methods ---
Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ne_i64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NE_I64
Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64()
{
} // ~Inst_VOP3__V_CMPX_NE_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GE_I64 class methods ---
Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ge_i64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GE_I64
Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64()
{
} // ~Inst_VOP3__V_CMPX_GE_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_T_I64 class methods ---
Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_t_i64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_T_I64
Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64()
{
} // ~Inst_VOP3__V_CMPX_T_I64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_F_U64 class methods ---
Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_f_u64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_F_U64
Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64()
{
} // ~Inst_VOP3__V_CMPX_F_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LT_U64 class methods ---
Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_lt_u64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LT_U64
Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64()
{
} // ~Inst_VOP3__V_CMPX_LT_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_EQ_U64 class methods ---
Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_eq_u64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_EQ_U64
Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64()
{
} // ~Inst_VOP3__V_CMPX_EQ_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_LE_U64 class methods ---
Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_le_u64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_LE_U64
Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64()
{
} // ~Inst_VOP3__V_CMPX_LE_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GT_U64 class methods ---
Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_gt_u64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GT_U64
Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64()
{
} // ~Inst_VOP3__V_CMPX_GT_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_NE_U64 class methods ---
Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ne_u64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_NE_U64
Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64()
{
} // ~Inst_VOP3__V_CMPX_NE_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_GE_U64 class methods ---
Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_ge_u64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_GE_U64
Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64()
{
} // ~Inst_VOP3__V_CMPX_GE_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CMPX_T_U64 class methods ---
Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cmpx_t_u64", true)
{
setFlag(ALU);
setFlag(WritesEXEC);
} // Inst_VOP3__V_CMPX_T_U64
Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64()
{
} // ~Inst_VOP3__V_CMPX_T_U64
// --- description from .arch file ---
// EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
void
Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
sdst.setBit(lane, 1);
}
}
wf->execMask() = sdst.rawData();
sdst.write();
} // execute
// --- Inst_VOP3__V_CNDMASK_B32 class methods ---
Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cndmask_b32", false)
{
setFlag(ALU);
setFlag(ReadsVCC);
} // Inst_VOP3__V_CNDMASK_B32
Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32()
{
} // ~Inst_VOP3__V_CNDMASK_B32
// --- description from .arch file ---
// D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
// as a scalar GPR in S2.
void
Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
vcc.read();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = bits(vcc.rawData(), lane)
? src1[lane] : src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_ADD_F32 class methods ---
Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_add_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_ADD_F32
Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32()
{
} // ~Inst_VOP3__V_ADD_F32
// --- description from .arch file ---
// D.f = S0.f + S1.f.
void
Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SUB_F32 class methods ---
Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sub_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_SUB_F32
Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32()
{
} // ~Inst_VOP3__V_SUB_F32
// --- description from .arch file ---
// D.f = S0.f - S1.f.
// SQ translates to V_ADD_F32.
void
Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SUBREV_F32 class methods ---
Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_subrev_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_SUBREV_F32
Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32()
{
} // ~Inst_VOP3__V_SUBREV_F32
// --- description from .arch file ---
// D.f = S1.f - S0.f.
// SQ translates to V_ADD_F32.
void
Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_LEGACY_F32 class methods ---
Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_legacy_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_MUL_LEGACY_F32
Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32()
{
} // ~Inst_VOP3__V_MUL_LEGACY_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0).
void
Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isnan(src0[lane]) ||
std::isnan(src1[lane])) {
vdst[lane] = NAN;
} else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) &&
!std::signbit(src0[lane])) {
if (std::isinf(src1[lane])) {
vdst[lane] = NAN;
} else if (!std::signbit(src1[lane])) {
vdst[lane] = +0.0;
} else {
vdst[lane] = -0.0;
}
} else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) &&
std::signbit(src0[lane])) {
if (std::isinf(src1[lane])) {
vdst[lane] = NAN;
} else if (std::signbit(src1[lane])) {
vdst[lane] = +0.0;
} else {
vdst[lane] = -0.0;
}
} else if (std::isinf(src0[lane]) &&
!std::signbit(src0[lane])) {
if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
vdst[lane] = NAN;
} else if (!std::signbit(src1[lane])) {
vdst[lane] = +INFINITY;
} else {
vdst[lane] = -INFINITY;
}
} else if (std::isinf(src0[lane]) &&
std::signbit(src0[lane])) {
if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
vdst[lane] = NAN;
} else if (std::signbit(src1[lane])) {
vdst[lane] = +INFINITY;
} else {
vdst[lane] = -INFINITY;
}
} else {
vdst[lane] = src0[lane] * src1[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_F32 class methods ---
Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_MUL_F32
Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32()
{
} // ~Inst_VOP3__V_MUL_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f.
void
Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isnan(src0[lane]) ||
std::isnan(src1[lane])) {
vdst[lane] = NAN;
} else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) &&
!std::signbit(src0[lane])) {
if (std::isinf(src1[lane])) {
vdst[lane] = NAN;
} else if (!std::signbit(src1[lane])) {
vdst[lane] = +0.0;
} else {
vdst[lane] = -0.0;
}
} else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) &&
std::signbit(src0[lane])) {
if (std::isinf(src1[lane])) {
vdst[lane] = NAN;
} else if (std::signbit(src1[lane])) {
vdst[lane] = +0.0;
} else {
vdst[lane] = -0.0;
}
} else if (std::isinf(src0[lane]) &&
!std::signbit(src0[lane])) {
if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
vdst[lane] = NAN;
} else if (!std::signbit(src1[lane])) {
vdst[lane] = +INFINITY;
} else {
vdst[lane] = -INFINITY;
}
} else if (std::isinf(src0[lane]) &&
std::signbit(src0[lane])) {
if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
vdst[lane] = NAN;
} else if (std::signbit(src1[lane])) {
vdst[lane] = +INFINITY;
} else {
vdst[lane] = -INFINITY;
}
} else {
vdst[lane] = src0[lane] * src1[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_I32_I24 class methods ---
Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_i32_i24", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MUL_I32_I24
Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24()
{
} // ~Inst_VOP3__V_MUL_I32_I24
// --- description from .arch file ---
// D.i = S0.i[23:0] * S1.i[23:0].
void
Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
* sext<24>(bits(src1[lane], 23, 0));
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_HI_I32_I24 class methods ---
Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_hi_i32_i24", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MUL_HI_I32_I24
Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24()
{
} // ~Inst_VOP3__V_MUL_HI_I32_I24
// --- description from .arch file ---
// D.i = (S0.i[23:0] * S1.i[23:0])>>32.
void
Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemI64 tmp_src0
= (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
VecElemI64 tmp_src1
= (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_U32_U24 class methods ---
Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_u32_u24", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MUL_U32_U24
Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24()
{
} // ~Inst_VOP3__V_MUL_U32_U24
// --- description from .arch file ---
// D.u = S0.u[23:0] * S1.u[23:0].
void
Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_HI_U32_U24 class methods ---
Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_hi_u32_u24", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MUL_HI_U32_U24
Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24()
{
} // ~Inst_VOP3__V_MUL_HI_U32_U24
// --- description from .arch file ---
// D.i = (S0.u[23:0] * S1.u[23:0])>>32.
void
Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MIN_F32 class methods ---
Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_MIN_F32
Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32()
{
} // ~Inst_VOP3__V_MIN_F32
// --- description from .arch file ---
// D.f = (S0.f < S1.f ? S0.f : S1.f).
void
Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fmin(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAX_F32 class methods ---
Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_MAX_F32
Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32()
{
} // ~Inst_VOP3__V_MAX_F32
// --- description from .arch file ---
// D.f = (S0.f >= S1.f ? S0.f : S1.f).
void
Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fmax(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MIN_I32 class methods ---
Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MIN_I32
Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32()
{
} // ~Inst_VOP3__V_MIN_I32
// --- description from .arch file ---
// D.i = min(S0.i, S1.i).
void
Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAX_I32 class methods ---
Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MAX_I32
Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32()
{
} // ~Inst_VOP3__V_MAX_I32
// --- description from .arch file ---
// D.i = max(S0.i, S1.i).
void
Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MIN_U32 class methods ---
Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MIN_U32
Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32()
{
} // ~Inst_VOP3__V_MIN_U32
// --- description from .arch file ---
// D.u = min(S0.u, S1.u).
void
Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAX_U32 class methods ---
Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MAX_U32
Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32()
{
} // ~Inst_VOP3__V_MAX_U32
// --- description from .arch file ---
// D.u = max(S0.u, S1.u).
void
Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LSHRREV_B32 class methods ---
Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_lshrrev_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_LSHRREV_B32
Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32()
{
} // ~Inst_VOP3__V_LSHRREV_B32
// --- description from .arch file ---
// D.u = S1.u >> S0.u[4:0].
// The vacated bits are set to zero.
// SQ translates this to an internal SP opcode.
void
Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_ASHRREV_I32 class methods ---
Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ashrrev_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_ASHRREV_I32
Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32()
{
} // ~Inst_VOP3__V_ASHRREV_I32
// --- description from .arch file ---
// D.i = signext(S1.i) >> S0.i[4:0].
// The vacated bits are set to the sign bit of the input value.
// SQ translates this to an internal SP opcode.
void
Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LSHLREV_B32 class methods ---
Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_lshlrev_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_LSHLREV_B32
Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32()
{
} // ~Inst_VOP3__V_LSHLREV_B32
// --- description from .arch file ---
// D.u = S1.u << S0.u[4:0].
// SQ translates this to an internal SP opcode.
void
Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_AND_B32 class methods ---
Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_and_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_AND_B32
Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32()
{
} // ~Inst_VOP3__V_AND_B32
// --- description from .arch file ---
// D.u = S0.u & S1.u.
// Input and output modifiers not supported.
void
Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] & src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_OR_B32 class methods ---
Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_or_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_OR_B32
Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32()
{
} // ~Inst_VOP3__V_OR_B32
// --- description from .arch file ---
// D.u = S0.u | S1.u.
// Input and output modifiers not supported.
void
Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] | src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_OR3_B32 class methods ---
Inst_VOP3__V_OR3_B32::Inst_VOP3__V_OR3_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_or3_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_OR3_B32
Inst_VOP3__V_OR3_B32::~Inst_VOP3__V_OR3_B32()
{
} // ~Inst_VOP3__V_OR3_B32
// --- description from .arch file ---
// D.u = S0.u | S1.u | S2.u.
// Input and output modifiers not supported.
void
Inst_VOP3__V_OR3_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] | src1[lane] | src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_XOR_B32 class methods ---
Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_xor_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_XOR_B32
Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32()
{
} // ~Inst_VOP3__V_XOR_B32
// --- description from .arch file ---
// D.u = S0.u ^ S1.u.
// Input and output modifiers not supported.
void
Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] ^ src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAC_F32 class methods ---
Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mac_f32", false)
{
setFlag(ALU);
setFlag(F32);
setFlag(MAC);
} // Inst_VOP3__V_MAC_F32
Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32()
{
} // ~Inst_VOP3__V_MAC_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f + D.f.
// SQ translates to V_MAD_F32.
void
Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
vdst.read();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_ADD_CO_U32 class methods ---
Inst_VOP3__V_ADD_CO_U32::Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_add_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
} // Inst_VOP3__V_ADD_CO_U32
Inst_VOP3__V_ADD_CO_U32::~Inst_VOP3__V_ADD_CO_U32()
{
} // ~Inst_VOP3__V_ADD_CO_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u;
// VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
// --- overflow or carry-out for V_ADDC_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
void
Inst_VOP3__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane];
vcc.setBit(lane, ((VecElemU64)src0[lane]
+ (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0);
}
}
vdst.write();
vcc.write();
} // execute
// --- Inst_VOP3__V_SUB_CO_U32 class methods ---
Inst_VOP3__V_SUB_CO_U32::Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_sub_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
} // Inst_VOP3__V_SUB_CO_U32
Inst_VOP3__V_SUB_CO_U32::~Inst_VOP3__V_SUB_CO_U32()
{
} // ~Inst_VOP3__V_SUB_CO_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u;
// VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
// carry-out for V_SUBB_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
void
Inst_VOP3__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
}
}
vdst.write();
vcc.write();
} // execute
// --- Inst_VOP3__V_SUBREV_CO_U32 class methods ---
Inst_VOP3__V_SUBREV_CO_U32::Inst_VOP3__V_SUBREV_CO_U32(
InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_subrev_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
} // Inst_VOP3__V_SUBREV_CO_U32
Inst_VOP3__V_SUBREV_CO_U32::~Inst_VOP3__V_SUBREV_CO_U32()
{
} // ~Inst_VOP3__V_SUBREV_CO_U32
// --- description from .arch file ---
// D.u = S1.u - S0.u;
// VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
// carry-out for V_SUBB_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
// SQ translates this to V_SUB_U32 with reversed operands.
void
Inst_VOP3__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
}
}
vdst.write();
vcc.write();
} // execute
// --- Inst_VOP3__V_ADDC_CO_U32 class methods ---
Inst_VOP3__V_ADDC_CO_U32::Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_addc_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
} // Inst_VOP3__V_ADDC_CO_U32
Inst_VOP3__V_ADDC_CO_U32::~Inst_VOP3__V_ADDC_CO_U32()
{
} // ~Inst_VOP3__V_ADDC_CO_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u + VCC[threadId];
// VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0)
// is an UNSIGNED overflow.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
// source comes from the SGPR-pair at S2.u.
void
Inst_VOP3__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src0.readSrc();
src1.readSrc();
vcc.read();
/**
* input modifiers are supported by FP operations only
*/
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane]
+ bits(vcc.rawData(), lane);
sdst.setBit(lane, ((VecElemU64)src0[lane]
+ (VecElemU64)src1[lane]
+ (VecElemU64)bits(vcc.rawData(), lane))
>= 0x100000000 ? 1 : 0);
}
}
vdst.write();
sdst.write();
} // execute
// --- Inst_VOP3__V_SUBB_CO_U32 class methods ---
Inst_VOP3__V_SUBB_CO_U32::Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_subb_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
} // Inst_VOP3__V_SUBB_CO_U32
Inst_VOP3__V_SUBB_CO_U32::~Inst_VOP3__V_SUBB_CO_U32()
{
} // ~Inst_VOP3__V_SUBB_CO_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u - VCC[threadId];
// VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
// --- overflow.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
// --- source comes from the SGPR-pair at S2.u.
void
Inst_VOP3__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
vcc.read();
/**
* input modifiers are supported by FP operations only
*/
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane]
- bits(vcc.rawData(), lane);
sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
> src0[lane] ? 1 : 0);
}
}
vdst.write();
sdst.write();
} // execute
// --- Inst_VOP3__V_SUBBREV_CO_U32 class methods ---
Inst_VOP3__V_SUBBREV_CO_U32::Inst_VOP3__V_SUBBREV_CO_U32(
InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_subbrev_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
} // Inst_VOP3__V_SUBBREV_CO_U32
Inst_VOP3__V_SUBBREV_CO_U32::~Inst_VOP3__V_SUBBREV_CO_U32()
{
} // ~Inst_VOP3__V_SUBBREV_CO_U32
// --- description from .arch file ---
// D.u = S1.u - S0.u - VCC[threadId];
// VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
// overflow.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
// source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32.
void
Inst_VOP3__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
vcc.read();
/**
* input modifiers are supported by FP operations only
*/
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane]
- bits(vcc.rawData(), lane);
sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
> src0[lane] ? 1 : 0);
}
}
vdst.write();
sdst.write();
} // execute
// --- Inst_VOP3__V_ADD_F16 class methods ---
Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_add_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_ADD_F16
Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16()
{
} // ~Inst_VOP3__V_ADD_F16
// --- description from .arch file ---
// D.f16 = S0.f16 + S1.f16.
// Supports denormals, round mode, exception flags, saturation.
void
Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_SUB_F16 class methods ---
Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sub_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_SUB_F16
Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16()
{
} // ~Inst_VOP3__V_SUB_F16
// --- description from .arch file ---
// D.f16 = S0.f16 - S1.f16.
// Supports denormals, round mode, exception flags, saturation.
// SQ translates to V_ADD_F16.
void
Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_SUBREV_F16 class methods ---
Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_subrev_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_SUBREV_F16
Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16()
{
} // ~Inst_VOP3__V_SUBREV_F16
// --- description from .arch file ---
// D.f16 = S1.f16 - S0.f16.
// Supports denormals, round mode, exception flags, saturation.
// SQ translates to V_ADD_F16.
void
Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_MUL_F16 class methods ---
Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_MUL_F16
Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16()
{
} // ~Inst_VOP3__V_MUL_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * S1.f16.
// Supports denormals, round mode, exception flags, saturation.
void
Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_MAC_F16 class methods ---
Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mac_f16", false)
{
setFlag(ALU);
setFlag(F16);
setFlag(MAC);
} // Inst_VOP3__V_MAC_F16
Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16()
{
} // ~Inst_VOP3__V_MAC_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * S1.f16 + D.f16.
// Supports round mode, exception flags, saturation.
// SQ translates this to V_MAD_F16.
void
Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_ADD_U16 class methods ---
Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_add_u16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_ADD_U16
Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16()
{
} // ~Inst_VOP3__V_ADD_U16
// --- description from .arch file ---
// D.u16 = S0.u16 + S1.u16.
// Supports saturation (unsigned 16-bit integer domain).
void
Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SUB_U16 class methods ---
Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sub_u16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_SUB_U16
Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16()
{
} // ~Inst_VOP3__V_SUB_U16
// --- description from .arch file ---
// D.u16 = S0.u16 - S1.u16.
// Supports saturation (unsigned 16-bit integer domain).
void
Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SUBREV_U16 class methods ---
Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_subrev_u16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_SUBREV_U16
Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16()
{
} // ~Inst_VOP3__V_SUBREV_U16
// --- description from .arch file ---
// D.u16 = S1.u16 - S0.u16.
// Supports saturation (unsigned 16-bit integer domain).
// SQ translates this to V_SUB_U16 with reversed operands.
void
Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_LO_U16 class methods ---
Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_lo_u16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MUL_LO_U16
Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16()
{
} // ~Inst_VOP3__V_MUL_LO_U16
// --- description from .arch file ---
// D.u16 = S0.u16 * S1.u16.
// Supports saturation (unsigned 16-bit integer domain).
void
Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] * src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LSHLREV_B16 class methods ---
Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_lshlrev_b16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_LSHLREV_B16
Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16()
{
} // ~Inst_VOP3__V_LSHLREV_B16
// --- description from .arch file ---
// D.u[15:0] = S1.u[15:0] << S0.u[3:0].
// SQ translates this to an internal SP opcode.
void
Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LSHRREV_B16 class methods ---
Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_lshrrev_b16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_LSHRREV_B16
Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16()
{
} // ~Inst_VOP3__V_LSHRREV_B16
// --- description from .arch file ---
// D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
// The vacated bits are set to zero.
// SQ translates this to an internal SP opcode.
void
Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_ASHRREV_I16 class methods ---
Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ashrrev_i16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_ASHRREV_I16
Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16()
{
} // ~Inst_VOP3__V_ASHRREV_I16
// --- description from .arch file ---
// D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
// The vacated bits are set to the sign bit of the input value.
// SQ translates this to an internal SP opcode.
void
Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
VecOperandI16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAX_F16 class methods ---
Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_MAX_F16
Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16()
{
} // ~Inst_VOP3__V_MAX_F16
// --- description from .arch file ---
// D.f16 = max(S0.f16, S1.f16).
// IEEE compliant. Supports denormals, round mode, exception flags,
// saturation.
void
Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_MIN_F16 class methods ---
Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_MIN_F16
Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16()
{
} // ~Inst_VOP3__V_MIN_F16
// --- description from .arch file ---
// D.f16 = min(S0.f16, S1.f16).
// IEEE compliant. Supports denormals, round mode, exception flags,
// saturation.
void
Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_MAX_U16 class methods ---
Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max_u16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MAX_U16
Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16()
{
} // ~Inst_VOP3__V_MAX_U16
// --- description from .arch file ---
// D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
void
Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAX_I16 class methods ---
Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max_i16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MAX_I16
Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16()
{
} // ~Inst_VOP3__V_MAX_I16
// --- description from .arch file ---
// D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
void
Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
VecOperandI16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MIN_U16 class methods ---
Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min_u16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MIN_U16
Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16()
{
} // ~Inst_VOP3__V_MIN_U16
// --- description from .arch file ---
// D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
void
Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MIN_I16 class methods ---
Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min_i16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MIN_I16
Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16()
{
} // ~Inst_VOP3__V_MIN_I16
// --- description from .arch file ---
// D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
void
Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
VecOperandI16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LDEXP_F16 class methods ---
Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ldexp_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_LDEXP_F16
Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16()
{
} // ~Inst_VOP3__V_LDEXP_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * (2 ** S1.i16).
void
Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_NOP class methods ---
Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_nop", false)
{
setFlag(Nop);
setFlag(ALU);
} // Inst_VOP3__V_NOP
Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP()
{
} // ~Inst_VOP3__V_NOP
// --- description from .arch file ---
// Do nothing.
void
Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_VOP3__V_MOV_B32 class methods ---
Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mov_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MOV_B32
Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32()
{
} // ~Inst_VOP3__V_MOV_B32
// --- description from .arch file ---
// D.u = S0.u.
// Input and output modifiers not supported; this is an untyped operation.
void
Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_I32_F64 class methods ---
Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_i32_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CVT_I32_F64
Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64()
{
} // ~Inst_VOP3__V_CVT_I32_F64
// --- description from .arch file ---
// D.i = (int)S0.d.
// Out-of-range floating point values (including infinity) saturate. NaN is
// --- converted to 0.
void
Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
std::frexp(src[lane],&exp);
if (std::isnan(src[lane])) {
vdst[lane] = 0;
} else if (std::isinf(src[lane]) || exp > 30) {
if (std::signbit(src[lane])) {
vdst[lane] = INT_MIN;
} else {
vdst[lane] = INT_MAX;
}
} else {
vdst[lane] = (VecElemI32)src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_F64_I32 class methods ---
Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f64_i32", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CVT_F64_I32
Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32()
{
} // ~Inst_VOP3__V_CVT_F64_I32
// --- description from .arch file ---
// D.d = (double)S0.i.
void
Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF64)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_F32_I32 class methods ---
Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f32_i32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_F32_I32
Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32()
{
} // ~Inst_VOP3__V_CVT_F32_I32
// --- description from .arch file ---
// D.f = (float)S0.i.
void
Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
VecOperandI32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_F32_U32 class methods ---
Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f32_u32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_F32_U32
Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32()
{
} // ~Inst_VOP3__V_CVT_F32_U32
// --- description from .arch file ---
// D.f = (float)S0.u.
void
Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_U32_F32 class methods ---
Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_u32_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_U32_F32
Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32()
{
} // ~Inst_VOP3__V_CVT_U32_F32
// --- description from .arch file ---
// D.u = (unsigned)S0.f.
// Out-of-range floating point values (including infinity) saturate. NaN is
// --- converted to 0.
void
Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
std::frexp(src[lane],&exp);
if (std::isnan(src[lane])) {
vdst[lane] = 0;
} else if (std::isinf(src[lane])) {
if (std::signbit(src[lane])) {
vdst[lane] = 0;
} else {
vdst[lane] = UINT_MAX;
}
} else if (exp > 31) {
vdst[lane] = UINT_MAX;
} else {
vdst[lane] = (VecElemU32)src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_I32_F32 class methods ---
Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_i32_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_I32_F32
Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32()
{
} // ~Inst_VOP3__V_CVT_I32_F32
// --- description from .arch file ---
// D.i = (int)S0.f.
// Out-of-range floating point values (including infinity) saturate. NaN is
// --- converted to 0.
void
Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
std::frexp(src[lane],&exp);
if (std::isnan(src[lane])) {
vdst[lane] = 0;
} else if (std::isinf(src[lane]) || exp > 30) {
if (std::signbit(src[lane])) {
vdst[lane] = INT_MIN;
} else {
vdst[lane] = INT_MAX;
}
} else {
vdst[lane] = (VecElemI32)src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MOV_FED_B32 class methods ---
Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mov_fed_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MOV_FED_B32
Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32()
{
} // ~Inst_VOP3__V_MOV_FED_B32
// --- description from .arch file ---
// D.u = S0.u;
// Introduce EDC double error upon write to dest vgpr without causing an
// --- exception.
// Input and output modifiers not supported; this is an untyped operation.
void
Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_F16_F32 class methods ---
Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f16_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_F16_F32
Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32()
{
} // ~Inst_VOP3__V_CVT_F16_F32
// --- description from .arch file ---
// D.f16 = flt32_to_flt16(S0.f).
// Supports input modifiers and creates FP16 denormals when appropriate.
void
Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_F32_F16 class methods ---
Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f32_f16", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_F32_F16
Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16()
{
} // ~Inst_VOP3__V_CVT_F32_F16
// --- description from .arch file ---
// D.f = flt16_to_flt32(S0.f16).
// FP16 denormal inputs are always accepted.
void
Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_RPI_I32_F32 class methods ---
Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_rpi_i32_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_RPI_I32_F32
Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32()
{
} // ~Inst_VOP3__V_CVT_RPI_I32_F32
// --- description from .arch file ---
// D.i = (int)floor(S0.f + 0.5).
void
Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_FLR_I32_F32 class methods ---
Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_flr_i32_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_FLR_I32_F32
Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32()
{
} // ~Inst_VOP3__V_CVT_FLR_I32_F32
// --- description from .arch file ---
// D.i = (int)floor(S0.f).
void
Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemI32)std::floor(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_OFF_F32_I4 class methods ---
Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_off_f32_i4", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_OFF_F32_I4
Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4()
{
} // ~Inst_VOP3__V_CVT_OFF_F32_I4
// --- description from .arch file ---
// 4-bit signed int to 32-bit float. Used for interpolation in shader.
void
Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst)
{
// Could not parse sq_uc.arch desc field
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_F32_F64 class methods ---
Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f32_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CVT_F32_F64
Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64()
{
} // ~Inst_VOP3__V_CVT_F32_F64
// --- description from .arch file ---
// D.f = (float)S0.d.
void
Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_F64_F32 class methods ---
Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f64_f32", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CVT_F64_F32
Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32()
{
} // ~Inst_VOP3__V_CVT_F64_F32
// --- description from .arch file ---
// D.d = (double)S0.f.
void
Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF64)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_F32_UBYTE0 class methods ---
Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f32_ubyte0", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_F32_UBYTE0
Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0()
{
} // ~Inst_VOP3__V_CVT_F32_UBYTE0
// --- description from .arch file ---
// D.f = (float)(S0.u[7:0]).
void
Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)bits(src[lane], 7, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_F32_UBYTE1 class methods ---
Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f32_ubyte1", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_F32_UBYTE1
Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1()
{
} // ~Inst_VOP3__V_CVT_F32_UBYTE1
// --- description from .arch file ---
// D.f = (float)(S0.u[15:8]).
void
Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)bits(src[lane], 15, 8);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_F32_UBYTE2 class methods ---
Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f32_ubyte2", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_F32_UBYTE2
Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2()
{
} // ~Inst_VOP3__V_CVT_F32_UBYTE2
// --- description from .arch file ---
// D.f = (float)(S0.u[23:16]).
void
Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)bits(src[lane], 23, 16);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_F32_UBYTE3 class methods ---
Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f32_ubyte3", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_F32_UBYTE3
Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3()
{
} // ~Inst_VOP3__V_CVT_F32_UBYTE3
// --- description from .arch file ---
// D.f = (float)(S0.u[31:24]).
void
Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)bits(src[lane], 31, 24);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_U32_F64 class methods ---
Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_u32_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CVT_U32_F64
Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64()
{
} // ~Inst_VOP3__V_CVT_U32_F64
// --- description from .arch file ---
// D.u = (unsigned)S0.d.
// Out-of-range floating point values (including infinity) saturate. NaN is
// --- converted to 0.
void
Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
std::frexp(src[lane],&exp);
if (std::isnan(src[lane])) {
vdst[lane] = 0;
} else if (std::isinf(src[lane])) {
if (std::signbit(src[lane])) {
vdst[lane] = 0;
} else {
vdst[lane] = UINT_MAX;
}
} else if (exp > 31) {
vdst[lane] = UINT_MAX;
} else {
vdst[lane] = (VecElemU32)src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_F64_U32 class methods ---
Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f64_u32", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CVT_F64_U32
Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32()
{
} // ~Inst_VOP3__V_CVT_F64_U32
// --- description from .arch file ---
// D.d = (double)S0.u.
void
Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF64)src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_TRUNC_F64 class methods ---
Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_trunc_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_TRUNC_F64
Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64()
{
} // ~Inst_VOP3__V_TRUNC_F64
// --- description from .arch file ---
// D.d = trunc(S0.d), return integer part of S0.d.
void
Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::trunc(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CEIL_F64 class methods ---
Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ceil_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_CEIL_F64
Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64()
{
} // ~Inst_VOP3__V_CEIL_F64
// --- description from .arch file ---
// D.d = trunc(S0.d);
// if(S0.d > 0.0 && S0.d != D.d) then D.d += 1.0.
void
Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::ceil(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_RNDNE_F64 class methods ---
Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rndne_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_RNDNE_F64
Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64()
{
} // ~Inst_VOP3__V_RNDNE_F64
// --- description from .arch file ---
// D.d = round_nearest_even(S0.d).
void
Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = roundNearestEven(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FLOOR_F64 class methods ---
Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_floor_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_FLOOR_F64
Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64()
{
} // ~Inst_VOP3__V_FLOOR_F64
// --- description from .arch file ---
// D.d = trunc(S0.d);
// if(S0.d < 0.0 && S0.d != D.d) then D.d += -1.0.
void
Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::floor(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FRACT_F32 class methods ---
Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_fract_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_FRACT_F32
Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32()
{
} // ~Inst_VOP3__V_FRACT_F32
// --- description from .arch file ---
// D.f = S0.f - floor(S0.f).
void
Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemF32 int_part(0.0);
vdst[lane] = std::modf(src[lane], &int_part);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_TRUNC_F32 class methods ---
Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_trunc_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_TRUNC_F32
Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32()
{
} // ~Inst_VOP3__V_TRUNC_F32
// --- description from .arch file ---
// D.f = trunc(S0.f), return integer part of S0.f.
void
Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::trunc(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CEIL_F32 class methods ---
Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ceil_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CEIL_F32
Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32()
{
} // ~Inst_VOP3__V_CEIL_F32
// --- description from .arch file ---
// D.f = trunc(S0.f);
// if(S0.f > 0.0 && S0.f != D.f) then D.f += 1.0.
void
Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::ceil(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_RNDNE_F32 class methods ---
Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rndne_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_RNDNE_F32
Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32()
{
} // ~Inst_VOP3__V_RNDNE_F32
// --- description from .arch file ---
// D.f = round_nearest_even(S0.f).
void
Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = roundNearestEven(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FLOOR_F32 class methods ---
Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_floor_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_FLOOR_F32
Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32()
{
} // ~Inst_VOP3__V_FLOOR_F32
// --- description from .arch file ---
// D.f = trunc(S0.f);
// if(S0.f < 0.0 && S0.f != D.f) then D.f += -1.0.
void
Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::floor(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_EXP_F32 class methods ---
Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_exp_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_EXP_F32
Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32()
{
} // ~Inst_VOP3__V_EXP_F32
// --- description from .arch file ---
// D.f = pow(2.0, S0.f).
void
Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::pow(2.0, src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LOG_F32 class methods ---
Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_log_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_LOG_F32
Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32()
{
} // ~Inst_VOP3__V_LOG_F32
// --- description from .arch file ---
// D.f = log2(S0.f). Base 2 logarithm.
void
Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::log2(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_RCP_F32 class methods ---
Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rcp_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_RCP_F32
Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32()
{
} // ~Inst_VOP3__V_RCP_F32
// --- description from .arch file ---
// D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error.
void
Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = 1.0 / src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_RCP_IFLAG_F32 class methods ---
Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rcp_iflag_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_RCP_IFLAG_F32
Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32()
{
} // ~Inst_VOP3__V_RCP_IFLAG_F32
// --- description from .arch file ---
// D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise
// --- integer DIV_BY_ZERO exception but cannot raise floating-point
// --- exceptions.
void
Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = 1.0 / src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_RSQ_F32 class methods ---
Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rsq_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_RSQ_F32
Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32()
{
} // ~Inst_VOP3__V_RSQ_F32
// --- description from .arch file ---
// D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules.
void
Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = 1.0 / std::sqrt(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_RCP_F64 class methods ---
Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rcp_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_RCP_F64
Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64()
{
} // ~Inst_VOP3__V_RCP_F64
// --- description from .arch file ---
// D.d = 1.0 / S0.d.
void
Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::fpclassify(src[lane]) == FP_ZERO) {
vdst[lane] = +INFINITY;
} else if (std::isnan(src[lane])) {
vdst[lane] = NAN;
} else if (std::isinf(src[lane])) {
if (std::signbit(src[lane])) {
vdst[lane] = -0.0;
} else {
vdst[lane] = 0.0;
}
} else {
vdst[lane] = 1.0 / src[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_RSQ_F64 class methods ---
Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rsq_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_RSQ_F64
Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64()
{
} // ~Inst_VOP3__V_RSQ_F64
// --- description from .arch file ---
// D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32.
void
Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::fpclassify(src[lane]) == FP_ZERO) {
vdst[lane] = +INFINITY;
} else if (std::isnan(src[lane])) {
vdst[lane] = NAN;
} else if (std::isinf(src[lane]) && !std::signbit(src[lane])) {
vdst[lane] = 0.0;
} else if (std::signbit(src[lane])) {
vdst[lane] = NAN;
} else {
vdst[lane] = 1.0 / std::sqrt(src[lane]);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SQRT_F32 class methods ---
Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sqrt_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_SQRT_F32
Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32()
{
} // ~Inst_VOP3__V_SQRT_F32
// --- description from .arch file ---
// D.f = sqrt(S0.f).
void
Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::sqrt(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SQRT_F64 class methods ---
Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sqrt_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_SQRT_F64
Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64()
{
} // ~Inst_VOP3__V_SQRT_F64
// --- description from .arch file ---
// D.d = sqrt(S0.d).
void
Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::sqrt(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SIN_F32 class methods ---
Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sin_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_SIN_F32
Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32()
{
} // ~Inst_VOP3__V_SIN_F32
// --- description from .arch file ---
// D.f = sin(S0.f * 2 * PI).
// Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
// float 0.0.
void
Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
pi.read();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::sin(src[lane] * 2 * pi.rawData());
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_COS_F32 class methods ---
Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cos_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_COS_F32
Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32()
{
} // ~Inst_VOP3__V_COS_F32
// --- description from .arch file ---
// D.f = cos(S0.f * 2 * PI).
// Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
// float 1.0.
void
Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
pi.read();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::cos(src[lane] * 2 * pi.rawData());
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_NOT_B32 class methods ---
Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_not_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_NOT_B32
Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32()
{
} // ~Inst_VOP3__V_NOT_B32
// --- description from .arch file ---
// D.u = ~S0.u.
// Input and output modifiers not supported.
void
Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = ~src[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_BFREV_B32 class methods ---
Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_bfrev_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_BFREV_B32
Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32()
{
} // ~Inst_VOP3__V_BFREV_B32
// --- description from .arch file ---
// D.u[31:0] = S0.u[0:31], bitfield reverse.
// Input and output modifiers not supported.
void
Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = reverseBits(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FFBH_U32 class methods ---
Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ffbh_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_FFBH_U32
Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32()
{
} // ~Inst_VOP3__V_FFBH_U32
// --- description from .arch file ---
// D.u = position of first 1 in S0.u from MSB;
// D.u = 0xffffffff if S0.u == 0.
void
Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = findFirstOneMsb(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FFBL_B32 class methods ---
Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ffbl_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_FFBL_B32
Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32()
{
} // ~Inst_VOP3__V_FFBL_B32
// --- description from .arch file ---
// D.u = position of first 1 in S0.u from LSB;
// D.u = 0xffffffff if S0.u == 0.
void
Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = findFirstOne(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FFBH_I32 class methods ---
Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ffbh_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_FFBH_I32
Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32()
{
} // ~Inst_VOP3__V_FFBH_I32
// --- description from .arch file ---
// D.u = position of first bit different from sign bit in S0.i from MSB;
// D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
void
Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = firstOppositeSignBit(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FREXP_EXP_I32_F64 class methods ---
Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_frexp_exp_i32_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_FREXP_EXP_I32_F64
Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64()
{
} // ~Inst_VOP3__V_FREXP_EXP_I32_F64
// --- description from .arch file ---
// See V_FREXP_EXP_I32_F32.
void
Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
vdst[lane] = 0;
} else {
VecElemI32 exp(0);
std::frexp(src[lane], &exp);
vdst[lane] = exp;
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FREXP_MANT_F64 class methods ---
Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_frexp_mant_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_FREXP_MANT_F64
Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64()
{
} // ~Inst_VOP3__V_FREXP_MANT_F64
// --- description from .arch file ---
// See V_FREXP_MANT_F32.
void
Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemI32 exp(0);
vdst[lane] = std::frexp(src[lane], &exp);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FRACT_F64 class methods ---
Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_fract_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_FRACT_F64
Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64()
{
} // ~Inst_VOP3__V_FRACT_F64
// --- description from .arch file ---
// See V_FRACT_F32.
void
Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemF32 int_part(0.0);
vdst[lane] = std::modf(src[lane], &int_part);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FREXP_EXP_I32_F32 class methods ---
Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_frexp_exp_i32_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_FREXP_EXP_I32_F32
Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32()
{
} // ~Inst_VOP3__V_FREXP_EXP_I32_F32
// --- description from .arch file ---
// if(S0.f == INF || S0.f == NAN) then D.i = 0;
// else D.i = TwosComplement(Exponent(S0.f) - 127 + 1).
// Returns exponent of single precision float input, such that S0.f =
// significand * (2 ** exponent). See also FREXP_MANT_F32, which returns
// the significand.
void
Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane])|| std::isnan(src[lane])) {
vdst[lane] = 0;
} else {
VecElemI32 exp(0);
std::frexp(src[lane], &exp);
vdst[lane] = exp;
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FREXP_MANT_F32 class methods ---
Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_frexp_mant_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_FREXP_MANT_F32
Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32()
{
} // ~Inst_VOP3__V_FREXP_MANT_F32
// --- description from .arch file ---
// if(S0.f == INF || S0.f == NAN) then D.f = S0.f;
// else D.f = Mantissa(S0.f).
// Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary
// --- significand of single precision float input, such that S0.f =
// --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which
// --- returns integer exponent.
void
Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
vdst[lane] = src[lane];
} else {
VecElemI32 exp(0);
vdst[lane] = std::frexp(src[lane], &exp);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CLREXCP class methods ---
Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_clrexcp", false)
{
} // Inst_VOP3__V_CLREXCP
Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP()
{
} // ~Inst_VOP3__V_CLREXCP
// --- description from .arch file ---
// Clear wave's exception state in SIMD (SP).
void
Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_F16_U16 class methods ---
Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f16_u16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CVT_F16_U16
Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16()
{
} // ~Inst_VOP3__V_CVT_F16_U16
// --- description from .arch file ---
// D.f16 = uint16_to_flt16(S.u16).
// Supports denormals, rounding, exception flags and saturation.
void
Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_F16_I16 class methods ---
Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_f16_i16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CVT_F16_I16
Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16()
{
} // ~Inst_VOP3__V_CVT_F16_I16
// --- description from .arch file ---
// D.f16 = int16_to_flt16(S.i16).
// Supports denormals, rounding, exception flags and saturation.
void
Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_U16_F16 class methods ---
Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_u16_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CVT_U16_F16
Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16()
{
} // ~Inst_VOP3__V_CVT_U16_F16
// --- description from .arch file ---
// D.u16 = flt16_to_uint16(S.f16).
// Supports rounding, exception flags and saturation.
void
Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_I16_F16 class methods ---
Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_i16_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CVT_I16_F16
Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16()
{
} // ~Inst_VOP3__V_CVT_I16_F16
// --- description from .arch file ---
// D.i16 = flt16_to_int16(S.f16).
// Supports rounding, exception flags and saturation.
void
Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_RCP_F16 class methods ---
Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rcp_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_RCP_F16
Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16()
{
} // ~Inst_VOP3__V_RCP_F16
// --- description from .arch file ---
// if(S0.f16 == 1.0f)
// D.f16 = 1.0f;
// else
// D.f16 = ApproximateRecip(S0.f16).
void
Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_SQRT_F16 class methods ---
Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sqrt_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_SQRT_F16
Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16()
{
} // ~Inst_VOP3__V_SQRT_F16
// --- description from .arch file ---
// if(S0.f16 == 1.0f)
// D.f16 = 1.0f;
// else
// D.f16 = ApproximateSqrt(S0.f16).
void
Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_RSQ_F16 class methods ---
Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rsq_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_RSQ_F16
Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16()
{
} // ~Inst_VOP3__V_RSQ_F16
// --- description from .arch file ---
// if(S0.f16 == 1.0f)
// D.f16 = 1.0f;
// else
// D.f16 = ApproximateRecipSqrt(S0.f16).
void
Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_LOG_F16 class methods ---
Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_log_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_LOG_F16
Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16()
{
} // ~Inst_VOP3__V_LOG_F16
// --- description from .arch file ---
// if(S0.f16 == 1.0f)
// D.f16 = 0.0f;
// else
// D.f16 = ApproximateLog2(S0.f16).
void
Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_EXP_F16 class methods ---
Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_exp_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_EXP_F16
Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16()
{
} // ~Inst_VOP3__V_EXP_F16
// --- description from .arch file ---
// if(S0.f16 == 0.0f)
// D.f16 = 1.0f;
// else
// D.f16 = Approximate2ToX(S0.f16).
void
Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_FREXP_MANT_F16 class methods ---
Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_frexp_mant_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_FREXP_MANT_F16
Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16()
{
} // ~Inst_VOP3__V_FREXP_MANT_F16
// --- description from .arch file ---
// if(S0.f16 == +-INF || S0.f16 == NAN)
// D.f16 = S0.f16;
// else
// D.f16 = mantissa(S0.f16).
// Result range is (-1.0,-0.5][0.5,1.0).
// C math library frexp function.
// Returns binary significand of half precision float input, such that the
// original single float = significand * (2 ** exponent).
void
Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_FREXP_EXP_I16_F16 class methods ---
Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_frexp_exp_i16_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_FREXP_EXP_I16_F16
Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16()
{
} // ~Inst_VOP3__V_FREXP_EXP_I16_F16
// --- description from .arch file ---
// if(S0.f16 == +-INF || S0.f16 == NAN)
// D.i16 = 0;
// else
// D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1).
// C math library frexp function.
// Returns exponent of half precision float input, such that the
// original single float = significand * (2 ** exponent).
void
Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_FLOOR_F16 class methods ---
Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_floor_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_FLOOR_F16
Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16()
{
} // ~Inst_VOP3__V_FLOOR_F16
// --- description from .arch file ---
// D.f16 = trunc(S0.f16);
// if(S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f.
void
Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CEIL_F16 class methods ---
Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ceil_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_CEIL_F16
Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16()
{
} // ~Inst_VOP3__V_CEIL_F16
// --- description from .arch file ---
// D.f16 = trunc(S0.f16);
// if(S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f.
void
Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_TRUNC_F16 class methods ---
Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_trunc_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_TRUNC_F16
Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16()
{
} // ~Inst_VOP3__V_TRUNC_F16
// --- description from .arch file ---
// D.f16 = trunc(S0.f16).
// Round-to-zero semantics.
void
Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_RNDNE_F16 class methods ---
Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_rndne_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_RNDNE_F16
Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16()
{
} // ~Inst_VOP3__V_RNDNE_F16
// --- description from .arch file ---
// D.f16 = FLOOR(S0.f16 + 0.5f);
// if(floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f.
// Round-to-nearest-even semantics.
void
Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_FRACT_F16 class methods ---
Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_fract_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_FRACT_F16
Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16()
{
} // ~Inst_VOP3__V_FRACT_F16
// --- description from .arch file ---
// D.f16 = S0.f16 + -floor(S0.f16).
void
Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_SIN_F16 class methods ---
Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sin_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_SIN_F16
Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16()
{
} // ~Inst_VOP3__V_SIN_F16
// --- description from .arch file ---
// D.f16 = sin(S0.f16 * 2 * PI).
void
Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_COS_F16 class methods ---
Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cos_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_COS_F16
Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16()
{
} // ~Inst_VOP3__V_COS_F16
// --- description from .arch file ---
// D.f16 = cos(S0.f16 * 2 * PI).
void
Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_EXP_LEGACY_F32 class methods ---
Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_exp_legacy_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_EXP_LEGACY_F32
Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32()
{
} // ~Inst_VOP3__V_EXP_LEGACY_F32
// --- description from .arch file ---
// D.f = pow(2.0, S0.f) with legacy semantics.
void
Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
if (instData.ABS & 0x1) {
src.absModifier();
}
if (extData.NEG & 0x1) {
src.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::pow(2.0, src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LOG_LEGACY_F32 class methods ---
Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_log_legacy_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_LOG_LEGACY_F32
Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32()
{
} // ~Inst_VOP3__V_LOG_LEGACY_F32
// --- description from .arch file ---
// D.f = log2(S0.f). Base 2 logarithm with legacy semantics.
void
Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::log2(src[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAD_LEGACY_F32 class methods ---
Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mad_legacy_f32", false)
{
setFlag(ALU);
setFlag(F32);
setFlag(MAD);
} // Inst_VOP3__V_MAD_LEGACY_F32
Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32()
{
} // ~Inst_VOP3__V_MAD_LEGACY_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f + S2.f (DX9 rules, 0.0 * x = 0.0).
void
Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAD_F32 class methods ---
Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mad_f32", false)
{
setFlag(ALU);
setFlag(F32);
setFlag(MAD);
} // Inst_VOP3__V_MAD_F32
Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32()
{
} // ~Inst_VOP3__V_MAD_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f + S2.f.
void
Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAD_I32_I24 class methods ---
Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mad_i32_i24", false)
{
setFlag(ALU);
setFlag(MAD);
} // Inst_VOP3__V_MAD_I32_I24
Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24()
{
} // ~Inst_VOP3__V_MAD_I32_I24
// --- description from .arch file ---
// D.i = S0.i[23:0] * S1.i[23:0] + S2.i.
void
Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
* sext<24>(bits(src1[lane], 23, 0)) + src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAD_U32_U24 class methods ---
Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mad_u32_u24", false)
{
setFlag(ALU);
setFlag(MAD);
} // Inst_VOP3__V_MAD_U32_U24
Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24()
{
} // ~Inst_VOP3__V_MAD_U32_U24
// --- description from .arch file ---
// D.u = S0.u[23:0] * S1.u[23:0] + S2.u.
void
Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0)
+ src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CUBEID_F32 class methods ---
Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cubeid_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CUBEID_F32
Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32()
{
} // ~Inst_VOP3__V_CUBEID_F32
// --- description from .arch file ---
// D.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). XYZ coordinate is given in
// --- (S0.f, S1.f, S2.f).
void
Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CUBESC_F32 class methods ---
Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cubesc_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CUBESC_F32
Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32()
{
} // ~Inst_VOP3__V_CUBESC_F32
// --- description from .arch file ---
// D.f = cubemap S coordinate. XYZ coordinate is given in (S0.f, S1.f,
// S2.f).
void
Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CUBETC_F32 class methods ---
Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cubetc_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CUBETC_F32
Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32()
{
} // ~Inst_VOP3__V_CUBETC_F32
// --- description from .arch file ---
// D.f = cubemap T coordinate. XYZ coordinate is given in (S0.f, S1.f,
// S2.f).
void
Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CUBEMA_F32 class methods ---
Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cubema_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CUBEMA_F32
Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32()
{
} // ~Inst_VOP3__V_CUBEMA_F32
// --- description from .arch file ---
// D.f = 2.0 * cubemap major axis. XYZ coordinate is given in (S0.f, S1.f,
// --- S2.f).
void
Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_BFE_U32 class methods ---
Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_bfe_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_BFE_U32
Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32()
{
} // ~Inst_VOP3__V_BFE_U32
// --- description from .arch file ---
// D.u = (S0.u>>S1.u[4:0]) & ((1<<S2.u[4:0])-1).
// Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
void
Inst_VOP3__V_BFE_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0))
& ((1 << bits(src2[lane], 4, 0)) - 1);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_BFE_I32 class methods ---
Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_bfe_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_BFE_I32
Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32()
{
} // ~Inst_VOP3__V_BFE_I32
// --- description from .arch file ---
// D.i = (S0.i>>S1.u[4:0]) & ((1<<S2.u[4:0])-1).
// Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
void
Inst_VOP3__V_BFE_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0))
& ((1 << bits(src2[lane], 4, 0)) - 1);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_BFI_B32 class methods ---
Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_bfi_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_BFI_B32
Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32()
{
} // ~Inst_VOP3__V_BFI_B32
// --- description from .arch file ---
// D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert.
void
Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane]
& src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FMA_F32 class methods ---
Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_fma_f32", false)
{
setFlag(ALU);
setFlag(F32);
setFlag(FMA);
} // Inst_VOP3__V_FMA_F32
Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32()
{
} // ~Inst_VOP3__V_FMA_F32
// --- description from .arch file ---
// D.f = S0.f * S1.f + S2.f.
void
Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FMA_F64 class methods ---
Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_fma_f64", false)
{
setFlag(ALU);
setFlag(F64);
setFlag(FMA);
} // Inst_VOP3__V_FMA_F64
Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64()
{
} // ~Inst_VOP3__V_FMA_F64
// --- description from .arch file ---
// D.d = S0.d * S1.d + S2.d.
void
Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LERP_U8 class methods ---
Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_lerp_u8", false)
{
setFlag(ALU);
} // Inst_VOP3__V_LERP_U8
Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8()
{
} // ~Inst_VOP3__V_LERP_U8
// --- description from .arch file ---
// D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24
// D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16;
// D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8;
// D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1).
// Unsigned 8-bit pixel average on packed unsigned bytes (linear
// --- interpolation). S2 acts as a round mode; if set, 0.5 rounds up,
// --- otherwise 0.5 truncates.
void
Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = ((bits(src0[lane], 31, 24)
+ bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1)
<< 24;
vdst[lane] += ((bits(src0[lane], 23, 16)
+ bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1)
<< 16;
vdst[lane] += ((bits(src0[lane], 15, 8)
+ bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1)
<< 8;
vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0)
+ bits(src2[lane], 0)) >> 1);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_ALIGNBIT_B32 class methods ---
Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_alignbit_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_ALIGNBIT_B32
Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32()
{
} // ~Inst_VOP3__V_ALIGNBIT_B32
// --- description from .arch file ---
// D.u = ({S0,S1} >> S2.u[4:0]) & 0xffffffff.
void
Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32)
| (VecElemU64)src1[lane]);
vdst[lane] = (VecElemU32)((src_0_1
>> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_ALIGNBYTE_B32 class methods ---
Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_alignbyte_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_ALIGNBYTE_B32
Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32()
{
} // ~Inst_VOP3__V_ALIGNBYTE_B32
// --- description from .arch file ---
// D.u = ({S0,S1} >> (8*S2.u[4:0])) & 0xffffffff.
void
Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32)
| (VecElemU64)src1[lane]);
vdst[lane] = (VecElemU32)((src_0_1
>> (8ULL * (VecElemU64)bits(src2[lane], 4, 0)))
& 0xffffffff);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MIN3_F32 class methods ---
Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min3_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_MIN3_F32
Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32()
{
} // ~Inst_VOP3__V_MIN3_F32
// --- description from .arch file ---
// D.f = min(S0.f, S1.f, S2.f).
void
Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]);
vdst[lane] = std::fmin(min_0_1, src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MIN3_I32 class methods ---
Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min3_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MIN3_I32
Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32()
{
} // ~Inst_VOP3__V_MIN3_I32
// --- description from .arch file ---
// D.i = min(S0.i, S1.i, S2.i).
void
Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]);
vdst[lane] = std::min(min_0_1, src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MIN3_U32 class methods ---
Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min3_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MIN3_U32
Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32()
{
} // ~Inst_VOP3__V_MIN3_U32
// --- description from .arch file ---
// D.u = min(S0.u, S1.u, S2.u).
void
Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]);
vdst[lane] = std::min(min_0_1, src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAX3_F32 class methods ---
Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max3_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_MAX3_F32
Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32()
{
} // ~Inst_VOP3__V_MAX3_F32
// --- description from .arch file ---
// D.f = max(S0.f, S1.f, S2.f).
void
Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]);
vdst[lane] = std::fmax(max_0_1, src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAX3_I32 class methods ---
Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max3_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MAX3_I32
Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32()
{
} // ~Inst_VOP3__V_MAX3_I32
// --- description from .arch file ---
// D.i = max(S0.i, S1.i, S2.i).
void
Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]);
vdst[lane] = std::max(max_0_1, src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAX3_U32 class methods ---
Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max3_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MAX3_U32
Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32()
{
} // ~Inst_VOP3__V_MAX3_U32
// --- description from .arch file ---
// D.u = max(S0.u, S1.u, S2.u).
void
Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]);
vdst[lane] = std::max(max_0_1, src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MED3_F32 class methods ---
Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_med3_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_MED3_F32
Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32()
{
} // ~Inst_VOP3__V_MED3_F32
// --- description from .arch file ---
// D.f = median(S0.f, S1.f, S2.f).
void
Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MED3_I32 class methods ---
Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_med3_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MED3_I32
Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32()
{
} // ~Inst_VOP3__V_MED3_I32
// --- description from .arch file ---
// D.i = median(S0.i, S1.i, S2.i).
void
Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MED3_U32 class methods ---
Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_med3_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MED3_U32
Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32()
{
} // ~Inst_VOP3__V_MED3_U32
// --- description from .arch file ---
// D.u = median(S0.u, S1.u, S2.u).
void
Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SAD_U8 class methods ---
Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sad_u8", false)
{
setFlag(ALU);
} // Inst_VOP3__V_SAD_U8
Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8()
{
} // ~Inst_VOP3__V_SAD_U8
// --- description from .arch file ---
// D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) +
// abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u.
// Sum of absolute differences with accumulation, overflow into upper bits
// is allowed.
void
Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::abs(bits(src0[lane], 31, 24)
- bits(src1[lane], 31, 24))
+ std::abs(bits(src0[lane], 23, 16)
- bits(src1[lane], 23, 16))
+ std::abs(bits(src0[lane], 15, 8)
- bits(src1[lane], 15, 8))
+ std::abs(bits(src0[lane], 7, 0)
- bits(src1[lane], 7, 0)) + src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SAD_HI_U8 class methods ---
Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sad_hi_u8", false)
{
setFlag(ALU);
} // Inst_VOP3__V_SAD_HI_U8
Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8()
{
} // ~Inst_VOP3__V_SAD_HI_U8
// --- description from .arch file ---
// D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u.
// Sum of absolute differences with accumulation, overflow is lost.
void
Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (((bits(src0[lane], 31, 24)
- bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16)
- bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8)
- bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0)
- bits(src1[lane], 7, 0))) << 16) + src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SAD_U16 class methods ---
Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sad_u16", false)
{
setFlag(ALU);
} // Inst_VOP3__V_SAD_U16
Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16()
{
} // ~Inst_VOP3__V_SAD_U16
// --- description from .arch file ---
// D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0])
// + S2.u.
// Word SAD with accumulation.
void
Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::abs(bits(src0[lane], 31, 16)
- bits(src1[lane], 31, 16))
+ std::abs(bits(src0[lane], 15, 0)
- bits(src1[lane], 15, 0)) + src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_SAD_U32 class methods ---
Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_sad_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_SAD_U32
Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32()
{
} // ~Inst_VOP3__V_SAD_U32
// --- description from .arch file ---
// D.u = abs(S0.i - S1.i) + S2.u.
// Dword SAD with accumulation.
void
Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane];
} // if
} // for
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_PK_U8_F32 class methods ---
Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_pk_u8_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_PK_U8_F32
Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32()
{
} // ~Inst_VOP3__V_CVT_PK_U8_F32
// --- description from .arch file ---
// D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0]))
// | (S2.u & ~(0xff << (8 * S1.u[1:0]))).
// Convert floating point value S0 to 8-bit unsigned integer and pack the
// result into byte S1 of dword S2.
void
Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (((VecElemU8)src0[lane] & 0xff)
<< (8 * bits(src1[lane], 1, 0)))
| (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0))));
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_DIV_FIXUP_F32 class methods ---
Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_div_fixup_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_DIV_FIXUP_F32
Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32()
{
} // ~Inst_VOP3__V_DIV_FIXUP_F32
// --- description from .arch file ---
// D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator,
// s2.f = Numerator. This opcode generates exceptions resulting from the
// division operation.
void
Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::fpclassify(src1[lane]) == FP_ZERO) {
if (std::signbit(src1[lane])) {
vdst[lane] = -INFINITY;
} else {
vdst[lane] = +INFINITY;
}
} else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) {
vdst[lane] = NAN;
} else if (std::isinf(src1[lane])) {
if (std::signbit(src1[lane])) {
vdst[lane] = -INFINITY;
} else {
vdst[lane] = +INFINITY;
}
} else {
vdst[lane] = src2[lane] / src1[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_DIV_FIXUP_F64 class methods ---
Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_div_fixup_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_DIV_FIXUP_F64
Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64()
{
} // ~Inst_VOP3__V_DIV_FIXUP_F64
// --- description from .arch file ---
// D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator,
// s2.d = Numerator. This opcode generates exceptions resulting from the
// division operation.
void
Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int sign_out = std::signbit(src1[lane])
^ std::signbit(src2[lane]);
int exp1(0);
int exp2(0);
std::frexp(src1[lane], &exp1);
std::frexp(src2[lane], &exp2);
if (std::isnan(src1[lane]) || std::isnan(src2[lane])) {
vdst[lane] = std::numeric_limits<VecElemF64>::quiet_NaN();
} else if (std::fpclassify(src1[lane]) == FP_ZERO
&& std::fpclassify(src2[lane]) == FP_ZERO) {
vdst[lane]
= std::numeric_limits<VecElemF64>::signaling_NaN();
} else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) {
vdst[lane]
= std::numeric_limits<VecElemF64>::signaling_NaN();
} else if (std::fpclassify(src1[lane]) == FP_ZERO
|| std::isinf(src2[lane])) {
vdst[lane] = sign_out ? -INFINITY : +INFINITY;
} else if (std::isinf(src1[lane])
|| std::fpclassify(src2[lane]) == FP_ZERO) {
vdst[lane] = sign_out ? -0.0 : +0.0;
} else if (exp2 - exp1 < -1075) {
vdst[lane] = src0[lane];
} else if (exp1 == 2047) {
vdst[lane] = src0[lane];
} else {
vdst[lane] = sign_out ? -std::fabs(src0[lane])
: std::fabs(src0[lane]);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_DIV_SCALE_F32 class methods ---
Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32(
InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_div_scale_f32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(F32);
} // Inst_VOP3__V_DIV_SCALE_F32
Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32()
{
} // ~Inst_VOP3__V_DIV_SCALE_F32
// --- description from .arch file ---
// {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f =
// Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a
// numerator and denominator, this opcode will appropriately scale inputs
// for division to avoid subnormal terms during Newton-Raphson correction
// algorithm. This opcode producses a VCC flag for post-scale of quotient.
void
Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane];
vcc.setBit(lane, 0);
}
}
vcc.write();
vdst.write();
} // execute
// --- Inst_VOP3__V_DIV_SCALE_F64 class methods ---
Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64(
InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_div_scale_f64")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(F64);
} // Inst_VOP3__V_DIV_SCALE_F64
Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64()
{
} // ~Inst_VOP3__V_DIV_SCALE_F64
// --- description from .arch file ---
// {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d =
// Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a
// numerator and denominator, this opcode will appropriately scale inputs
// for division to avoid subnormal terms during Newton-Raphson correction
// algorithm. This opcode producses a VCC flag for post-scale of quotient.
void
Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp1(0);
int exp2(0);
std::frexp(src1[lane], &exp1);
std::frexp(src2[lane], &exp2);
vcc.setBit(lane, 0);
if (std::fpclassify(src1[lane]) == FP_ZERO
|| std::fpclassify(src2[lane]) == FP_ZERO) {
vdst[lane] = NAN;
} else if (exp2 - exp1 >= 768) {
vcc.setBit(lane, 1);
if (src0[lane] == src1[lane]) {
vdst[lane] = std::ldexp(src0[lane], 128);
}
} else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) {
vdst[lane] = std::ldexp(src0[lane], 128);
} else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL
&& std::fpclassify(src2[lane] / src1[lane])
== FP_SUBNORMAL) {
vcc.setBit(lane, 1);
if (src0[lane] == src1[lane]) {
vdst[lane] = std::ldexp(src0[lane], 128);
}
} else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) {
vdst[lane] = std::ldexp(src0[lane], -128);
} else if (std::fpclassify(src2[lane] / src1[lane])
== FP_SUBNORMAL) {
vcc.setBit(lane, 1);
if (src0[lane] == src2[lane]) {
vdst[lane] = std::ldexp(src0[lane], 128);
}
} else if (exp2 <= 53) {
vdst[lane] = std::ldexp(src0[lane], 128);
}
}
}
vcc.write();
vdst.write();
} // execute
// --- Inst_VOP3__V_DIV_FMAS_F32 class methods ---
Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_div_fmas_f32", false)
{
setFlag(ALU);
setFlag(ReadsVCC);
setFlag(F32);
setFlag(FMA);
} // Inst_VOP3__V_DIV_FMAS_F32
Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32()
{
} // ~Inst_VOP3__V_DIV_FMAS_F32
// --- description from .arch file ---
// D.f = Special case divide FMA with scale and flags(s0.f = Quotient,
// s1.f = Denominator, s2.f = Numerator)
void
Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
}
}
//vdst.write();
} // execute
// --- Inst_VOP3__V_DIV_FMAS_F64 class methods ---
Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_div_fmas_f64", false)
{
setFlag(ALU);
setFlag(ReadsVCC);
setFlag(F64);
setFlag(FMA);
} // Inst_VOP3__V_DIV_FMAS_F64
Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64()
{
} // ~Inst_VOP3__V_DIV_FMAS_F64
// --- description from .arch file ---
// D.d = Special case divide FMA with scale and flags(s0.d = Quotient,
// s1.d = Denominator, s2.d = Numerator)
void
Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
src0.readSrc();
src1.readSrc();
src2.readSrc();
vcc.read();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (instData.ABS & 0x4) {
src2.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
if (extData.NEG & 0x4) {
src2.negModifier();
}
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (bits(vcc.rawData(), lane)) {
vdst[lane] = std::pow(2, 64)
* std::fma(src0[lane], src1[lane], src2[lane]);
} else {
vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MSAD_U8 class methods ---
Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_msad_u8", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MSAD_U8
Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8()
{
} // ~Inst_VOP3__V_MSAD_U8
// --- description from .arch file ---
// D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u).
void
Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_QSAD_PK_U16_U8 class methods ---
Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_qsad_pk_u16_u8", false)
{
setFlag(ALU);
} // Inst_VOP3__V_QSAD_PK_U16_U8
Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8()
{
} // ~Inst_VOP3__V_QSAD_PK_U16_U8
// --- description from .arch file ---
// D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
// S1.u[31:0], S2.u[63:0])
void
Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_MQSAD_PK_U16_U8 class methods ---
Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mqsad_pk_u16_u8", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MQSAD_PK_U16_U8
Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8()
{
} // ~Inst_VOP3__V_MQSAD_PK_U16_U8
// --- description from .arch file ---
// D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
// --- S1.u[31:0], S2.u[63:0])
void
Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_MQSAD_U32_U8 class methods ---
Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mqsad_u32_u8", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MQSAD_U32_U8
Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8()
{
} // ~Inst_VOP3__V_MQSAD_U32_U8
// --- description from .arch file ---
// D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0],
// --- S1.u[31:0], S2.u[127:0])
void
Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_MAD_U64_U32 class methods ---
Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32(
InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_mad_u64_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(MAD);
} // Inst_VOP3__V_MAD_U64_U32
Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32()
{
} // ~Inst_VOP3__V_MAD_U64_U32
// --- description from .arch file ---
// {vcc_out,D.u64} = S0.u32 * S1.u32 + S2.u64.
void
Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU64 src2(gpuDynInst, extData.SRC2);
ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
VecOperandU64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
vdst.read();
/**
* input modifiers are supported by FP operations only
*/
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane],
src2[lane]));
}
}
vcc.write();
vdst.write();
} // execute
// --- Inst_VOP3__V_MAD_I64_I32 class methods ---
Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32(
InFmt_VOP3B *iFmt)
: Inst_VOP3B(iFmt, "v_mad_i64_i32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(MAD);
} // Inst_VOP3__V_MAD_I64_I32
Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32()
{
} // ~Inst_VOP3__V_MAD_I64_I32
// --- description from .arch file ---
// {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64.
void
Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandI64 src2(gpuDynInst, extData.SRC2);
ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
VecOperandI64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane],
src2[lane]));
}
}
vcc.write();
vdst.write();
} // execute
// --- Inst_VOP3__V_LSHL_ADD_U32 class methods ---
Inst_VOP3__V_LSHL_ADD_U32::Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_lshl_add_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_LSHL_ADD_U32
Inst_VOP3__V_LSHL_ADD_U32::~Inst_VOP3__V_LSHL_ADD_U32()
{
} // ~Inst_VOP3__V_LSHL_ADD_U32
// --- description from .arch file ---
// D.u = (S0.u << S1.u[4:0]) + S2.u.
void
Inst_VOP3__V_LSHL_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0))
+ src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_ADD_LSHL_U32 class methods ---
Inst_VOP3__V_ADD_LSHL_U32::Inst_VOP3__V_ADD_LSHL_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_add_lshl_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_ADD_LSHL_U32
Inst_VOP3__V_ADD_LSHL_U32::~Inst_VOP3__V_ADD_LSHL_U32()
{
} // ~Inst_VOP3__V_ADD_LSHL_U32
// --- description from .arch file ---
// D.u = (S0.u + S1.u) << S2.u[4:0].
void
Inst_VOP3__V_ADD_LSHL_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] =
(src0[lane] + src1[lane]) << bits(src2[lane], 4, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_ADD3_U32 class methods ---
Inst_VOP3__V_ADD3_U32::Inst_VOP3__V_ADD3_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_add3_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_ADD3_U32
Inst_VOP3__V_ADD3_U32::~Inst_VOP3__V_ADD3_U32()
{
} // ~Inst_VOP3__V_ADD3_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u + S2.u.
void
Inst_VOP3__V_ADD3_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane] + src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LSHL_OR_B32 class methods ---
Inst_VOP3__V_LSHL_OR_B32::Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_lshl_or_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_LSHL_OR_B32
Inst_VOP3__V_LSHL_OR_B32::~Inst_VOP3__V_LSHL_OR_B32()
{
} // ~Inst_VOP3__V_LSHL_OR_B32
// --- description from .arch file ---
// D.u = (S0.u << S1.u[4:0]) | S2.u.
void
Inst_VOP3__V_LSHL_OR_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0))
| src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAD_F16 class methods ---
Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mad_f16", false)
{
setFlag(ALU);
setFlag(F16);
setFlag(MAD);
} // Inst_VOP3__V_MAD_F16
Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16()
{
} // ~Inst_VOP3__V_MAD_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * S1.f16 + S2.f16.
// Supports round mode, exception flags, saturation.
void
Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_MAD_U16 class methods ---
Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mad_u16", false)
{
setFlag(ALU);
setFlag(MAD);
} // Inst_VOP3__V_MAD_U16
Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16()
{
} // ~Inst_VOP3__V_MAD_U16
// --- description from .arch file ---
// D.u16 = S0.u16 * S1.u16 + S2.u16.
// Supports saturation (unsigned 16-bit integer domain).
void
Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU16 src2(gpuDynInst, extData.SRC2);
VecOperandU16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] * src1[lane] + src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAD_I16 class methods ---
Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mad_i16", false)
{
setFlag(ALU);
setFlag(MAD);
} // Inst_VOP3__V_MAD_I16
Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16()
{
} // ~Inst_VOP3__V_MAD_I16
// --- description from .arch file ---
// D.i16 = S0.i16 * S1.i16 + S2.i16.
// Supports saturation (signed 16-bit integer domain).
void
Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
ConstVecOperandI16 src2(gpuDynInst, extData.SRC2);
VecOperandI16 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] * src1[lane] + src2[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_PERM_B32 class methods ---
Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_perm_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_PERM_B32
Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32()
{
} // ~Inst_VOP3__V_PERM_B32
// --- description from .arch file ---
// D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]);
// D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]);
// D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]);
// D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]);
// byte permute(byte in[8], byte sel) {
// if(sel>=13) then return 0xff;
// elsif(sel==12) then return 0x00;
// elsif(sel==11) then return in[7][7] * 0xff;
// elsif(sel==10) then return in[5][7] * 0xff;
// elsif(sel==9) then return in[3][7] * 0xff;
// elsif(sel==8) then return in[1][7] * 0xff;
// else return in[sel];
// }
// Byte permute.
void
Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
src2.readSrc();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemU64 selector = (VecElemU64)src0[lane];
selector = (selector << 32) | (VecElemU64)src1[lane];
vdst[lane] = 0;
DPRINTF(VEGA, "Executing v_perm_b32 src_0 0x%08x, src_1 "
"0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane],
src1[lane], src2[lane], vdst[lane]);
DPRINTF(VEGA, "Selector: 0x%08x \n", selector);
for (int i = 0; i < 4 ; ++i) {
VecElemU32 permuted_val = permute(selector, 0xFF
& ((VecElemU32)src2[lane] >> (8 * i)));
vdst[lane] |= (permuted_val << i);
}
DPRINTF(VEGA, "v_perm result: 0x%08x\n", vdst[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_FMA_F16 class methods ---
Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_fma_f16", false)
{
setFlag(ALU);
setFlag(F16);
setFlag(FMA);
} // Inst_VOP3__V_FMA_F16
Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16()
{
} // ~Inst_VOP3__V_FMA_F16
// --- description from .arch file ---
// D.f16 = S0.f16 * S1.f16 + S2.f16.
// Fused half precision multiply add.
void
Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_DIV_FIXUP_F16 class methods ---
Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_div_fixup_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_DIV_FIXUP_F16
Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16()
{
} // ~Inst_VOP3__V_DIV_FIXUP_F16
// --- description from .arch file ---
// sign_out = sign(S1.f16)^sign(S2.f16);
// if (S2.f16 == NAN)
// D.f16 = Quiet(S2.f16);
// else if (S1.f16 == NAN)
// D.f16 = Quiet(S1.f16);
// else if (S1.f16 == S2.f16 == 0)
// # 0/0
// D.f16 = pele_nan(0xfe00);
// else if (abs(S1.f16) == abs(S2.f16) == +-INF)
// # inf/inf
// D.f16 = pele_nan(0xfe00);
// else if (S1.f16 ==0 || abs(S2.f16) == +-INF)
// # x/0, or inf/y
// D.f16 = sign_out ? -INF : INF;
// else if (abs(S1.f16) == +-INF || S2.f16 == 0)
// # x/inf, 0/y
// D.f16 = sign_out ? -0 : 0;
// else if ((exp(S2.f16) - exp(S1.f16)) < -150)
// D.f16 = sign_out ? -underflow : underflow;
// else if (exp(S1.f16) == 255)
// D.f16 = sign_out ? -overflow : overflow;
// else
// D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16).
// Half precision division fixup.
// S0 = Quotient, S1 = Denominator, S3 = Numerator.
// Given a numerator, denominator, and quotient from a divide, this opcode
// will detect and apply special case numerics, touching up the quotient if
// necessary. This opcode also generates invalid, denorm and divide by
// zero exceptions caused by the division.
void
Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_PKACCUM_U8_F32 class methods ---
Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_pkaccum_u8_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_PKACCUM_U8_F32
Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32()
{
} // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32
// --- description from .arch file ---
// byte = S1.u[1:0]; bit = byte * 8;
// D.u[bit+7:bit] = flt32_to_uint8(S0.f);
// Pack converted value of S0.f into byte S1 of the destination.
// SQ translates to V_CVT_PK_U8_F32.
// Note: this opcode uses src_c to pass destination in as a source.
void
Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_INTERP_P1_F32 class methods ---
Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_interp_p1_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_INTERP_P1_F32
Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32()
{
} // ~Inst_VOP3__V_INTERP_P1_F32
// --- description from .arch file ---
// D.f = P10 * S.f + P0; parameter interpolation (SQ translates to
// V_MAD_F32 for SP).
// CAUTION: when in HALF_LDS mode, D must not be the same GPR as S; if
// D == S then data corruption will occur.
// NOTE: In textual representations the I/J VGPR is the first source and
// the attribute is the second source; however in the VOP3 encoding the
// attribute is stored in the src0 field and the VGPR is stored in the
// src1 field.
void
Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_INTERP_P2_F32 class methods ---
Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_interp_p2_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_INTERP_P2_F32
Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32()
{
} // ~Inst_VOP3__V_INTERP_P2_F32
// --- description from .arch file ---
// D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to
// V_MAD_F32 for SP).
// NOTE: In textual representations the I/J VGPR is the first source and
// the attribute is the second source; however in the VOP3 encoding the
// attribute is stored in the src0 field and the VGPR is stored in the
// src1 field.
void
Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_INTERP_MOV_F32 class methods ---
Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_interp_mov_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_INTERP_MOV_F32
Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32()
{
} // ~Inst_VOP3__V_INTERP_MOV_F32
// --- description from .arch file ---
// D.f = {P10,P20,P0}[S.u]; parameter load.
void
Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_INTERP_P1LL_F16 class methods ---
Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_interp_p1ll_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_INTERP_P1LL_F16
Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16()
{
} // ~Inst_VOP3__V_INTERP_P1LL_F16
// --- description from .arch file ---
// D.f32 = P10.f16 * S0.f32 + P0.f16.
// 'LL' stands for 'two LDS arguments'.
// attr_word selects the high or low half 16 bits of each LDS dword
// accessed.
// This opcode is available for 32-bank LDS only.
// NOTE: In textual representations the I/J VGPR is the first source and
// the attribute is the second source; however in the VOP3 encoding the
// attribute is stored in the src0 field and the VGPR is stored in the
// src1 field.
void
Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_INTERP_P1LV_F16 class methods ---
Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_interp_p1lv_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_INTERP_P1LV_F16
Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16()
{
} // ~Inst_VOP3__V_INTERP_P1LV_F16
// --- description from .arch file ---
// D.f32 = P10.f16 * S0.f32 + (S2.u32 >> (attr_word * 16)).f16.
// 'LV' stands for 'One LDS and one VGPR argument'.
// S2 holds two parameters, attr_word selects the high or low word of the
// VGPR for this calculation, as well as the high or low half of the LDS
// data.
// Meant for use with 16-bank LDS.
// NOTE: In textual representations the I/J VGPR is the first source and
// the attribute is the second source; however in the VOP3 encoding the
// attribute is stored in the src0 field and the VGPR is stored in the
// src1 field.
void
Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_INTERP_P2_F16 class methods ---
Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_interp_p2_f16", false)
{
setFlag(ALU);
setFlag(F16);
} // Inst_VOP3__V_INTERP_P2_F16
Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16()
{
} // ~Inst_VOP3__V_INTERP_P2_F16
// --- description from .arch file ---
// D.f16 = P20.f16 * S0.f32 + S2.f32.
// Final computation. attr_word selects LDS high or low 16bits. Used for
// both 16- and 32-bank LDS.
// Result is always written to the 16 LSBs of the destination VGPR.
// NOTE: In textual representations the I/J VGPR is the first source and
// the attribute is the second source; however in the VOP3 encoding the
// attribute is stored in the src0 field and the VGPR is stored in the
// src1 field.
void
Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_ADD_F64 class methods ---
Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_add_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_ADD_F64
Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64()
{
} // ~Inst_VOP3__V_ADD_F64
// --- description from .arch file ---
// D.d = S0.d + S1.d.
void
Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isnan(src0[lane]) ||
std::isnan(src1[lane]) ) {
vdst[lane] = NAN;
} else if (std::isinf(src0[lane]) &&
std::isinf(src1[lane])) {
if (std::signbit(src0[lane]) !=
std::signbit(src1[lane])) {
vdst[lane] = NAN;
} else {
vdst[lane] = src0[lane];
}
} else if (std::isinf(src0[lane])) {
vdst[lane] = src0[lane];
} else if (std::isinf(src1[lane])) {
vdst[lane] = src1[lane];
} else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) {
if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
if (std::signbit(src0[lane]) &&
std::signbit(src1[lane])) {
vdst[lane] = -0.0;
} else {
vdst[lane] = 0.0;
}
} else {
vdst[lane] = src1[lane];
}
} else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) {
if (std::signbit(src0[lane]) &&
std::signbit(src1[lane])) {
vdst[lane] = -0.0;
} else {
vdst[lane] = 0.0;
}
} else {
vdst[lane] = src0[lane];
}
} else {
vdst[lane] = src0[lane] + src1[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_F64 class methods ---
Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_MUL_F64
Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64()
{
} // ~Inst_VOP3__V_MUL_F64
// --- description from .arch file ---
// D.d = S0.d * S1.d.
void
Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isnan(src0[lane]) ||
std::isnan(src1[lane])) {
vdst[lane] = NAN;
} else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) &&
!std::signbit(src0[lane])) {
if (std::isinf(src1[lane])) {
vdst[lane] = NAN;
} else if (!std::signbit(src1[lane])) {
vdst[lane] = +0.0;
} else {
vdst[lane] = -0.0;
}
} else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
std::fpclassify(src0[lane]) == FP_ZERO) &&
std::signbit(src0[lane])) {
if (std::isinf(src1[lane])) {
vdst[lane] = NAN;
} else if (std::signbit(src1[lane])) {
vdst[lane] = +0.0;
} else {
vdst[lane] = -0.0;
}
} else if (std::isinf(src0[lane]) &&
!std::signbit(src0[lane])) {
if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
vdst[lane] = NAN;
} else if (!std::signbit(src1[lane])) {
vdst[lane] = +INFINITY;
} else {
vdst[lane] = -INFINITY;
}
} else if (std::isinf(src0[lane]) &&
std::signbit(src0[lane])) {
if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
std::fpclassify(src1[lane]) == FP_ZERO) {
vdst[lane] = NAN;
} else if (std::signbit(src1[lane])) {
vdst[lane] = +INFINITY;
} else {
vdst[lane] = -INFINITY;
}
} else {
vdst[lane] = src0[lane] * src1[lane];
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MIN_F64 class methods ---
Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_min_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_MIN_F64
Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64()
{
} // ~Inst_VOP3__V_MIN_F64
// --- description from .arch file ---
// D.d = min(S0.d, S1.d).
void
Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fmin(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MAX_F64 class methods ---
Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_max_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_MAX_F64
Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64()
{
} // ~Inst_VOP3__V_MAX_F64
// --- description from .arch file ---
// D.d = max(S0.d, S1.d).
void
Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (instData.ABS & 0x2) {
src1.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
if (extData.NEG & 0x2) {
src1.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fmax(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LDEXP_F64 class methods ---
Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ldexp_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_LDEXP_F64
Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64()
{
} // ~Inst_VOP3__V_LDEXP_F64
// --- description from .arch file ---
// D.d = pow(S0.d, S1.i[31:0]).
void
Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandF64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
if (instData.ABS & 0x1) {
src0.absModifier();
}
if (extData.NEG & 0x1) {
src0.negModifier();
}
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isnan(src0[lane]) || std::isinf(src0[lane])) {
vdst[lane] = src0[lane];
} else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
|| std::fpclassify(src0[lane]) == FP_ZERO) {
if (std::signbit(src0[lane])) {
vdst[lane] = -0.0;
} else {
vdst[lane] = +0.0;
}
} else {
vdst[lane] = std::ldexp(src0[lane], src1[lane]);
}
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_LO_U32 class methods ---
Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_lo_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MUL_LO_U32
Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32()
{
} // ~Inst_VOP3__V_MUL_LO_U32
// --- description from .arch file ---
// D.u = S0.u * S1.u.
void
Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemI64 s0 = (VecElemI64)src0[lane];
VecElemI64 s1 = (VecElemI64)src1[lane];
vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_HI_U32 class methods ---
Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_hi_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MUL_HI_U32
Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32()
{
} // ~Inst_VOP3__V_MUL_HI_U32
// --- description from .arch file ---
// D.u = (S0.u * S1.u) >> 32.
void
Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemI64 s0 = (VecElemI64)src0[lane];
VecElemI64 s1 = (VecElemI64)src1[lane];
vdst[lane]
= (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MUL_HI_I32 class methods ---
Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mul_hi_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MUL_HI_I32
Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32()
{
} // ~Inst_VOP3__V_MUL_HI_I32
// --- description from .arch file ---
// D.i = (S0.i * S1.i) >> 32.
void
Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
VecOperandI32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemI64 s0 = (VecElemI64)src0[lane];
VecElemI64 s1 = (VecElemI64)src1[lane];
vdst[lane]
= (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LDEXP_F32 class methods ---
Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ldexp_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_LDEXP_F32
Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32()
{
} // ~Inst_VOP3__V_LDEXP_F32
// --- description from .arch file ---
// D.f = pow(S0.f, S1.i)
void
Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::ldexp(src0[lane], src1[lane]);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_READLANE_B32 class methods ---
Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_readlane_b32", true)
{
setFlag(ALU);
setFlag(IgnoreExec);
} // Inst_VOP3__V_READLANE_B32
Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32()
{
} // ~Inst_VOP3__V_READLANE_B32
// --- description from .arch file ---
// Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR#
// or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask.
// Input and output modifiers not supported; this is an untyped operation.
void
Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
sdst = src0[src1.rawData() & 0x3f];
sdst.write();
} // execute
// --- Inst_VOP3__V_WRITELANE_B32 class methods ---
Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_writelane_b32", false)
{
setFlag(ALU);
setFlag(IgnoreExec);
} // Inst_VOP3__V_WRITELANE_B32
Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32()
{
} // ~Inst_VOP3__V_WRITELANE_B32
// --- description from .arch file ---
// Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data
// (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores
// exec mask.
// Input and output modifiers not supported; this is an untyped operation.
// SQ translates to V_MOV_B32.
void
Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0);
ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.read();
src1.read();
vdst.read();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
vdst[src1.rawData() & 0x3f] = src0.rawData();
vdst.write();
} // execute
// --- Inst_VOP3__V_BCNT_U32_B32 class methods ---
Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_bcnt_u32_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_BCNT_U32_B32
Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32()
{
} // ~Inst_VOP3__V_BCNT_U32_B32
// --- description from .arch file ---
// D.u = CountOneBits(S0.u) + S1.u. Bit count.
void
Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = popCount(src0[lane]) + src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MBCNT_LO_U32_B32 class methods ---
Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mbcnt_lo_u32_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MBCNT_LO_U32_B32
Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32()
{
} // ~Inst_VOP3__V_MBCNT_LO_U32_B32
// --- description from .arch file ---
// ThreadMask = (1 << ThreadPosition) - 1;
// D.u = CountOneBits(S0.u & ThreadMask[31:0]) + S1.u.
// Masked bit count, ThreadPosition is the position of this thread in the
// --- wavefront (in 0..63).
void
Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
uint64_t threadMask = 0;
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
threadMask = ((1LL << lane) - 1LL);
vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) +
src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods ---
Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_mbcnt_hi_u32_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_MBCNT_HI_U32_B32
Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32()
{
} // ~Inst_VOP3__V_MBCNT_HI_U32_B32
// --- description from .arch file ---
// ThreadMask = (1 << ThreadPosition) - 1;
// D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u.
// Masked bit count, ThreadPosition is the position of this thread in the
// --- wavefront (in 0..63).
void
Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
uint64_t threadMask = 0;
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
threadMask = ((1LL << lane) - 1LL);
vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) +
src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LSHLREV_B64 class methods ---
Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_lshlrev_b64", false)
{
setFlag(ALU);
} // Inst_VOP3__V_LSHLREV_B64
Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64()
{
} // ~Inst_VOP3__V_LSHLREV_B64
// --- description from .arch file ---
// D.u64 = S1.u64 << S0.u[5:0].
// SQ translates this to an internal SP opcode.
void
Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
VecOperandU64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] << bits(src0[lane], 5, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LSHRREV_B64 class methods ---
Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_lshrrev_b64", false)
{
setFlag(ALU);
} // Inst_VOP3__V_LSHRREV_B64
Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64()
{
} // ~Inst_VOP3__V_LSHRREV_B64
// --- description from .arch file ---
// D.u64 = S1.u64 >> S0.u[5:0].
// The vacated bits are set to zero.
// SQ translates this to an internal SP opcode.
void
Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
VecOperandU64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_ASHRREV_I64 class methods ---
Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_ashrrev_i64", false)
{
setFlag(ALU);
} // Inst_VOP3__V_ASHRREV_I64
Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64()
{
} // ~Inst_VOP3__V_ASHRREV_I64
// --- description from .arch file ---
// D.u64 = signext(S1.u64) >> S0.u[5:0].
// The vacated bits are set to the sign bit of the input value.
// SQ translates this to an internal SP opcode.
void
Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
VecOperandU64 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane]
= src1[lane] >> bits(src0[lane], 5, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_TRIG_PREOP_F64 class methods ---
Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_trig_preop_f64", false)
{
setFlag(ALU);
setFlag(F64);
} // Inst_VOP3__V_TRIG_PREOP_F64
Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64()
{
} // ~Inst_VOP3__V_TRIG_PREOP_F64
// --- description from .arch file ---
// D.d = Look Up 2/PI (S0.d) with segment select S1.u[4:0]. This operation
// returns an aligned, double precision segment of 2/PI needed to do range
// reduction on S0.d (double-precision value). Multiple segments can be
// specified through S1.u[4:0]. Rounding is always round-to-zero. Large
// inputs (exp > 1968) are scaled to avoid loss of precision through
// denormalization.
void
Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_BFM_B32 class methods ---
Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_bfm_b32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_BFM_B32
Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32()
{
} // ~Inst_VOP3__V_BFM_B32
// --- description from .arch file ---
// D.u = ((1<<S0.u[4:0])-1) << S1.u[4:0]; S0 is the bitfield width and S1
// is the bitfield offset.
void
Inst_VOP3__V_BFM_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1)
<< bits(src1[lane], 4, 0);
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_CVT_PKNORM_I16_F32 class methods ---
Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_pknorm_i16_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_PKNORM_I16_F32
Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32()
{
} // ~Inst_VOP3__V_CVT_PKNORM_I16_F32
// --- description from .arch file ---
// D = {(snorm)S1.f, (snorm)S0.f}.
void
Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_PKNORM_U16_F32 class methods ---
Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_pknorm_u16_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_PKNORM_U16_F32
Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32()
{
} // ~Inst_VOP3__V_CVT_PKNORM_U16_F32
// --- description from .arch file ---
// D = {(unorm)S1.f, (unorm)S0.f}.
void
Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_PKRTZ_F16_F32 class methods ---
Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32(
InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_pkrtz_f16_f32", false)
{
setFlag(ALU);
setFlag(F32);
} // Inst_VOP3__V_CVT_PKRTZ_F16_F32
Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32()
{
} // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32
// --- description from .arch file ---
// D = {flt32_to_flt16(S1.f),flt32_to_flt16(S0.f)}, with round-toward-zero
// --- regardless of current round mode setting in hardware.
// This opcode is intended for use with 16-bit compressed exports.
// See V_CVT_F16_F32 for a version that respects the current rounding mode.
void
Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_PK_U16_U32 class methods ---
Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_pk_u16_u32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_CVT_PK_U16_U32
Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32()
{
} // ~Inst_VOP3__V_CVT_PK_U16_U32
// --- description from .arch file ---
// D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}.
void
Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_VOP3__V_CVT_PK_I16_I32 class methods ---
Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3A *iFmt)
: Inst_VOP3A(iFmt, "v_cvt_pk_i16_i32", false)
{
setFlag(ALU);
} // Inst_VOP3__V_CVT_PK_I16_I32
Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32()
{
} // ~Inst_VOP3__V_CVT_PK_I16_I32
// --- description from .arch file ---
// D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}.
void
Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_ADD_U32 class methods ---
Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_u32")
{
} // Inst_DS__DS_ADD_U32
Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32()
{
} // ~Inst_DS__DS_ADD_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_SUB_U32 class methods ---
Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_sub_u32")
{
} // Inst_DS__DS_SUB_U32
Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32()
{
} // ~Inst_DS__DS_SUB_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_RSUB_U32 class methods ---
Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_rsub_u32")
{
} // Inst_DS__DS_RSUB_U32
Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32()
{
} // ~Inst_DS__DS_RSUB_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA - MEM[ADDR];
// RETURN_DATA = tmp.
// Subtraction with reversed operands.
void
Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_INC_U32 class methods ---
Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_inc_u32")
{
} // Inst_DS__DS_INC_U32
Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32()
{
} // ~Inst_DS__DS_INC_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_DEC_U32 class methods ---
Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_dec_u32")
{
} // Inst_DS__DS_DEC_U32
Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32()
{
} // ~Inst_DS__DS_DEC_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
// (unsigned compare); RETURN_DATA = tmp.
void
Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_I32 class methods ---
Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_i32")
{
} // Inst_DS__DS_MIN_I32
Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32()
{
} // ~Inst_DS__DS_MIN_I32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_I32 class methods ---
Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_i32")
{
} // Inst_DS__DS_MAX_I32
Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32()
{
} // ~Inst_DS__DS_MAX_I32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_U32 class methods ---
Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_u32")
{
} // Inst_DS__DS_MIN_U32
Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32()
{
} // ~Inst_DS__DS_MIN_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_U32 class methods ---
Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_u32")
{
} // Inst_DS__DS_MAX_U32
Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32()
{
} // ~Inst_DS__DS_MAX_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_AND_B32 class methods ---
Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_and_b32")
{
} // Inst_DS__DS_AND_B32
Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32()
{
} // ~Inst_DS__DS_AND_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] &= DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_OR_B32 class methods ---
Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_or_b32")
{
} // Inst_DS__DS_OR_B32
Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32()
{
} // ~Inst_DS__DS_OR_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] |= DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_XOR_B32 class methods ---
Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_xor_b32")
{
} // Inst_DS__DS_XOR_B32
Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32()
{
} // ~Inst_DS__DS_XOR_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] ^= DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MSKOR_B32 class methods ---
Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_mskor_b32")
{
} // Inst_DS__DS_MSKOR_B32
Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32()
{
} // ~Inst_DS__DS_MSKOR_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
// RETURN_DATA = tmp.
// Masked dword OR, D0 contains the mask and D1 contains the new value.
void
Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRITE_B32 class methods ---
Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write_b32")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE_B32
Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32()
{
} // ~Inst_DS__DS_WRITE_B32
// --- description from .arch file ---
// 32b:
// MEM[ADDR] = DATA.
// Write dword.
void
Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
addr.read();
data.read();
calcAddr(gpuDynInst, addr);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0;
Addr offset1 = instData.OFFSET1;
Addr offset = (offset1 << 8) | offset0;
initMemWrite<VecElemU32>(gpuDynInst, offset);
} // initiateAcc
void
Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_DS__DS_WRITE2_B32 class methods ---
Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write2_b32")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE2_B32
Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32()
{
} // ~Inst_DS__DS_WRITE2_B32
// --- description from .arch file ---
// 32b:
// MEM[ADDR_BASE + OFFSET0 * 4] = DATA;
// MEM[ADDR_BASE + OFFSET1 * 4] = DATA2.
// Write 2 dwords.
void
Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
addr.read();
data0.read();
data1.read();
calcAddr(gpuDynInst, addr);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
= data0[lane];
(reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
}
}
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0 * 4;
Addr offset1 = instData.OFFSET1 * 4;
initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
}
void
Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst)
{
}
// --- Inst_DS__DS_WRITE2ST64_B32 class methods ---
Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write2st64_b32")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE2ST64_B32
Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32()
{
} // ~Inst_DS__DS_WRITE2ST64_B32
// --- description from .arch file ---
// 32b:
// MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA;
// MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2;
// Write 2 dwords.
void
Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
addr.read();
data0.read();
data1.read();
calcAddr(gpuDynInst, addr);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
= data0[lane];
(reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
}
}
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0 * 4 * 64;
Addr offset1 = instData.OFFSET1 * 4 * 64;
initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
}
void
Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
{
}
// --- Inst_DS__DS_CMPST_B32 class methods ---
Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_cmpst_b32")
{
} // Inst_DS__DS_CMPST_B32
Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32()
{
} // ~Inst_DS__DS_CMPST_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// src = DATA2;
// cmp = DATA;
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
// Compare and store.
// Caution, the order of src and cmp are the *opposite* of the
// --- BUFFER_ATOMIC_CMPSWAP opcode.
void
Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_CMPST_F32 class methods ---
Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_cmpst_f32")
{
setFlag(F32);
} // Inst_DS__DS_CMPST_F32
Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32()
{
} // ~Inst_DS__DS_CMPST_F32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// src = DATA2;
// cmp = DATA;
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
// Floating point compare and store that handles NaN/INF/denormal values.
// Caution, the order of src and cmp are the *opposite* of the
// --- BUFFER_ATOMIC_FCMPSWAP opcode.
void
Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_F32 class methods ---
Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_f32")
{
setFlag(F32);
} // Inst_DS__DS_MIN_F32
Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32()
{
} // ~Inst_DS__DS_MIN_F32
// --- description from .arch file ---
// 32b.
// tmp = MEM[ADDR];
// src = DATA;
// cmp = DATA2;
// MEM[ADDR] = (cmp < tmp) ? src : tmp.
// Floating point minimum that handles NaN/INF/denormal values.
// Note that this opcode is slightly more general-purpose than
// --- BUFFER_ATOMIC_FMIN.
void
Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_F32 class methods ---
Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_f32")
{
setFlag(F32);
} // Inst_DS__DS_MAX_F32
Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32()
{
} // ~Inst_DS__DS_MAX_F32
// --- description from .arch file ---
// 32b.
// tmp = MEM[ADDR];
// src = DATA;
// cmp = DATA2;
// MEM[ADDR] = (tmp > cmp) ? src : tmp.
// Floating point maximum that handles NaN/INF/denormal values.
// Note that this opcode is slightly more general-purpose than
// --- BUFFER_ATOMIC_FMAX.
void
Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_NOP class methods ---
Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_nop")
{
setFlag(Nop);
} // Inst_DS__DS_NOP
Inst_DS__DS_NOP::~Inst_DS__DS_NOP()
{
} // ~Inst_DS__DS_NOP
// --- description from .arch file ---
// Do nothing.
void
Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst)
{
gpuDynInst->wavefront()->decLGKMInstsIssued();
} // execute
// --- Inst_DS__DS_ADD_F32 class methods ---
Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_f32")
{
setFlag(F32);
} // Inst_DS__DS_ADD_F32
Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32()
{
} // ~Inst_DS__DS_ADD_F32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA;
// RETURN_DATA = tmp.
// Floating point add that handles NaN/INF/denormal values.
void
Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRITE_B8 class methods ---
Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write_b8")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE_B8
Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8()
{
} // ~Inst_DS__DS_WRITE_B8
// --- description from .arch file ---
// MEM[ADDR] = DATA[7:0].
// Byte write.
void
Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU8 data(gpuDynInst, extData.DATA0);
addr.read();
data.read();
calcAddr(gpuDynInst, addr);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0;
Addr offset1 = instData.OFFSET1;
Addr offset = (offset1 << 8) | offset0;
initMemWrite<VecElemU8>(gpuDynInst, offset);
} // initiateAcc
void
Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_DS__DS_WRITE_B16 class methods ---
Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write_b16")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE_B16
Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16()
{
} // ~Inst_DS__DS_WRITE_B16
// --- description from .arch file ---
// MEM[ADDR] = DATA[15:0]
// Short write.
void
Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU16 data(gpuDynInst, extData.DATA0);
addr.read();
data.read();
calcAddr(gpuDynInst, addr);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0;
Addr offset1 = instData.OFFSET1;
Addr offset = (offset1 << 8) | offset0;
initMemWrite<VecElemU16>(gpuDynInst, offset);
} // initiateAcc
void
Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_DS__DS_ADD_RTN_U32 class methods ---
Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_rtn_u32")
{
} // Inst_DS__DS_ADD_RTN_U32
Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32()
{
} // ~Inst_DS__DS_ADD_RTN_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_SUB_RTN_U32 class methods ---
Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_sub_rtn_u32")
{
} // Inst_DS__DS_SUB_RTN_U32
Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32()
{
} // ~Inst_DS__DS_SUB_RTN_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_RSUB_RTN_U32 class methods ---
Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_rsub_rtn_u32")
{
} // Inst_DS__DS_RSUB_RTN_U32
Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32()
{
} // ~Inst_DS__DS_RSUB_RTN_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA - MEM[ADDR];
// RETURN_DATA = tmp.
// Subtraction with reversed operands.
void
Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_INC_RTN_U32 class methods ---
Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_inc_rtn_u32")
{
} // Inst_DS__DS_INC_RTN_U32
Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32()
{
} // ~Inst_DS__DS_INC_RTN_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_DEC_RTN_U32 class methods ---
Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_dec_rtn_u32")
{
} // Inst_DS__DS_DEC_RTN_U32
Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32()
{
} // ~Inst_DS__DS_DEC_RTN_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
// (unsigned compare); RETURN_DATA = tmp.
void
Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_RTN_I32 class methods ---
Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_rtn_i32")
{
} // Inst_DS__DS_MIN_RTN_I32
Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32()
{
} // ~Inst_DS__DS_MIN_RTN_I32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_RTN_I32 class methods ---
Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_rtn_i32")
{
} // Inst_DS__DS_MAX_RTN_I32
Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32()
{
} // ~Inst_DS__DS_MAX_RTN_I32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_RTN_U32 class methods ---
Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_rtn_u32")
{
} // Inst_DS__DS_MIN_RTN_U32
Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32()
{
} // ~Inst_DS__DS_MIN_RTN_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_RTN_U32 class methods ---
Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_rtn_u32")
{
} // Inst_DS__DS_MAX_RTN_U32
Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32()
{
} // ~Inst_DS__DS_MAX_RTN_U32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_AND_RTN_B32 class methods ---
Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_and_rtn_b32")
{
} // Inst_DS__DS_AND_RTN_B32
Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32()
{
} // ~Inst_DS__DS_AND_RTN_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] &= DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_OR_RTN_B32 class methods ---
Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_or_rtn_b32")
{
} // Inst_DS__DS_OR_RTN_B32
Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32()
{
} // ~Inst_DS__DS_OR_RTN_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] |= DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_XOR_RTN_B32 class methods ---
Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_xor_rtn_b32")
{
} // Inst_DS__DS_XOR_RTN_B32
Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32()
{
} // ~Inst_DS__DS_XOR_RTN_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] ^= DATA;
// RETURN_DATA = tmp.
void
Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MSKOR_RTN_B32 class methods ---
Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_mskor_rtn_b32")
{
} // Inst_DS__DS_MSKOR_RTN_B32
Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32()
{
} // ~Inst_DS__DS_MSKOR_RTN_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
// RETURN_DATA = tmp.
// Masked dword OR, D0 contains the mask and D1 contains the new value.
void
Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRXCHG_RTN_B32 class methods ---
Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_wrxchg_rtn_b32")
{
} // Inst_DS__DS_WRXCHG_RTN_B32
Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32()
{
} // ~Inst_DS__DS_WRXCHG_RTN_B32
// --- description from .arch file ---
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA;
// RETURN_DATA = tmp.
// Write-exchange operation.
void
Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRXCHG2_RTN_B32 class methods ---
Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_wrxchg2_rtn_b32")
{
} // Inst_DS__DS_WRXCHG2_RTN_B32
Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32()
{
} // ~Inst_DS__DS_WRXCHG2_RTN_B32
// --- description from .arch file ---
// Write-exchange 2 separate dwords.
void
Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRXCHG2ST64_RTN_B32 class methods ---
Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32(
InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32")
{
} // Inst_DS__DS_WRXCHG2ST64_RTN_B32
Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32()
{
} // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32
// --- description from .arch file ---
// Write-exchange 2 separate dwords with a stride of 64 dwords.
void
Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_CMPST_RTN_B32 class methods ---
Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_cmpst_rtn_b32")
{
} // Inst_DS__DS_CMPST_RTN_B32
Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32()
{
} // ~Inst_DS__DS_CMPST_RTN_B32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// src = DATA2;
// cmp = DATA;
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
// Compare and store.
// Caution, the order of src and cmp are the *opposite* of the
// --- BUFFER_ATOMIC_CMPSWAP opcode.
void
Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_CMPST_RTN_F32 class methods ---
Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_cmpst_rtn_f32")
{
setFlag(F32);
} // Inst_DS__DS_CMPST_RTN_F32
Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32()
{
} // ~Inst_DS__DS_CMPST_RTN_F32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// src = DATA2;
// cmp = DATA;
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
// Floating point compare and store that handles NaN/INF/denormal values.
// Caution, the order of src and cmp are the *opposite* of the
// --- BUFFER_ATOMIC_FCMPSWAP opcode.
void
Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_RTN_F32 class methods ---
Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_rtn_f32")
{
setFlag(F32);
} // Inst_DS__DS_MIN_RTN_F32
Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32()
{
} // ~Inst_DS__DS_MIN_RTN_F32
// --- description from .arch file ---
// 32b.
// tmp = MEM[ADDR];
// src = DATA;
// cmp = DATA2;
// MEM[ADDR] = (cmp < tmp) ? src : tmp.
// Floating point minimum that handles NaN/INF/denormal values.
// Note that this opcode is slightly more general-purpose than
// --- BUFFER_ATOMIC_FMIN.
void
Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_RTN_F32 class methods ---
Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_rtn_f32")
{
setFlag(F32);
} // Inst_DS__DS_MAX_RTN_F32
Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32()
{
} // ~Inst_DS__DS_MAX_RTN_F32
// --- description from .arch file ---
// 32b.
// tmp = MEM[ADDR];
// src = DATA;
// cmp = DATA2;
// MEM[ADDR] = (tmp > cmp) ? src : tmp.
// Floating point maximum that handles NaN/INF/denormal values.
// Note that this opcode is slightly more general-purpose than
// --- BUFFER_ATOMIC_FMAX.
void
Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRAP_RTN_B32 class methods ---
Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_wrap_rtn_b32")
{
} // Inst_DS__DS_WRAP_RTN_B32
Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32()
{
} // ~Inst_DS__DS_WRAP_RTN_B32
// --- description from .arch file ---
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2;
// RETURN_DATA = tmp.
void
Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_ADD_RTN_F32 class methods ---
Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_rtn_f32")
{
setFlag(F32);
} // Inst_DS__DS_ADD_RTN_F32
Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32()
{
} // ~Inst_DS__DS_ADD_RTN_F32
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA;
// RETURN_DATA = tmp.
// Floating point add that handles NaN/INF/denormal values.
void
Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_READ_B32 class methods ---
Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read_b32")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ_B32
Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32()
{
} // ~Inst_DS__DS_READ_B32
// --- description from .arch file ---
// RETURN_DATA = MEM[ADDR].
// Dword read.
void
Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr);
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0;
Addr offset1 = instData.OFFSET1;
Addr offset = (offset1 << 8) | offset0;
initMemRead<VecElemU32>(gpuDynInst, offset);
} // initiateAcc
void
Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
} // completeAcc
// --- Inst_DS__DS_READ2_B32 class methods ---
Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read2_b32")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ2_B32
Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32()
{
} // ~Inst_DS__DS_READ2_B32
// --- description from .arch file ---
// RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4];
// RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4].
// Read 2 dwords.
void
Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr);
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0 * 4;
Addr offset1 = instData.OFFSET1 * 4;
initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
} // initiateAcc
void
Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst0(gpuDynInst, extData.VDST);
VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2];
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2 + 1];
}
}
vdst0.write();
vdst1.write();
} // completeAcc
// --- Inst_DS__DS_READ2ST64_B32 class methods ---
Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read2st64_b32")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ2ST64_B32
Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32()
{
} // ~Inst_DS__DS_READ2ST64_B32
// --- description from .arch file ---
// RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64];
// RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64].
// Read 2 dwords.
void
Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr);
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = (instData.OFFSET0 * 4 * 64);
Addr offset1 = (instData.OFFSET1 * 4 * 64);
initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
}
void
Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst0(gpuDynInst, extData.VDST);
VecOperandU32 vdst1(gpuDynInst, extData.VDST + 2);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane * 2];
vdst1[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane * 2 + 1];
}
}
vdst0.write();
vdst1.write();
}
// --- Inst_DS__DS_READ_I8 class methods ---
Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read_i8")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ_I8
Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8()
{
} // ~Inst_DS__DS_READ_I8
// --- description from .arch file ---
// RETURN_DATA = signext(MEM[ADDR][7:0]).
// Signed byte read.
void
Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_READ_U8 class methods ---
Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read_u8")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ_U8
Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8()
{
} // ~Inst_DS__DS_READ_U8
// --- description from .arch file ---
// RETURN_DATA = {24'h0,MEM[ADDR][7:0]}.
// Unsigned byte read.
void
Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr);
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0;
Addr offset1 = instData.OFFSET1;
Addr offset = (offset1 << 8) | offset0;
initMemRead<VecElemU8>(gpuDynInst, offset);
} // initiateAcc
void
Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU8*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
} // completeAcc
// --- Inst_DS__DS_READ_I16 class methods ---
Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read_i16")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ_I16
Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16()
{
} // ~Inst_DS__DS_READ_I16
// --- description from .arch file ---
// RETURN_DATA = signext(MEM[ADDR][15:0]).
// Signed short read.
void
Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_READ_U16 class methods ---
Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read_u16")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ_U16
Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16()
{
} // ~Inst_DS__DS_READ_U16
// --- description from .arch file ---
// RETURN_DATA = {16'h0,MEM[ADDR][15:0]}.
// Unsigned short read.
void
Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr);
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0;
Addr offset1 = instData.OFFSET1;
Addr offset = (offset1 << 8) | offset0;
initMemRead<VecElemU16>(gpuDynInst, offset);
} // initiateAcc
void
Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU16*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
} // completeAcc
// --- Inst_DS__DS_SWIZZLE_B32 class methods ---
Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_swizzle_b32")
{
/**
* While this operation doesn't actually use DS storage we classify
* it as a load here because it does a writeback to a VGPR, which
* fits in better with the LDS pipeline logic.
*/
setFlag(Load);
} // Inst_DS__DS_SWIZZLE_B32
Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32()
{
} // ~Inst_DS__DS_SWIZZLE_B32
// --- description from .arch file ---
// RETURN_DATA = swizzle(vgpr_data, offset1:offset0).
// Dword swizzle, no data is written to LDS memory; See ds_opcodes.docx for
// --- details.
void
Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
wf->decLGKMInstsIssued();
if (gpuDynInst->exec_mask.none()) {
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()
->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
VecOperandU32 vdst(gpuDynInst, extData.VDST);
/**
* The "DS pattern" is comprised of both offset fields. That is, the
* swizzle pattern between lanes. Bit 15 of the DS pattern dictates
* which swizzle mode to use. There are two different swizzle
* patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use
* QDMode else use Bit-masks mode. The remaining bits dictate how to
* swizzle the lanes.
*
* QDMode: Chunks the lanes into 4s and swizzles among them.
* Bits 7:6 dictate where lane 3 (of the current chunk)
* gets its date, 5:4 lane 2, etc.
*
* Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks.
* 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0
* is the and_mask. Each lane is swizzled by performing
* the appropriate operation using these masks.
*/
VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0);
data.read();
if (bits(ds_pattern, 15)) {
// QDMode
for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) {
/**
* This operation allows data sharing between groups
* of four consecutive threads. Note the increment by
* 4 in the for loop.
*/
if (gpuDynInst->exec_mask[lane]) {
int index0 = lane + bits(ds_pattern, 1, 0);
panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) "
"is out of bounds.\n", gpuDynInst->disassemble(),
index0);
vdst[lane]
= gpuDynInst->exec_mask[index0] ? data[index0]: 0;
}
if (gpuDynInst->exec_mask[lane + 1]) {
int index1 = lane + bits(ds_pattern, 3, 2);
panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) "
"is out of bounds.\n", gpuDynInst->disassemble(),
index1);
vdst[lane + 1]
= gpuDynInst->exec_mask[index1] ? data[index1]: 0;
}
if (gpuDynInst->exec_mask[lane + 2]) {
int index2 = lane + bits(ds_pattern, 5, 4);
panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) "
"is out of bounds.\n", gpuDynInst->disassemble(),
index2);
vdst[lane + 2]
= gpuDynInst->exec_mask[index2] ? data[index2]: 0;
}
if (gpuDynInst->exec_mask[lane + 3]) {
int index3 = lane + bits(ds_pattern, 7, 6);
panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) "
"is out of bounds.\n", gpuDynInst->disassemble(),
index3);
vdst[lane + 3]
= gpuDynInst->exec_mask[index3] ? data[index3]: 0;
}
}
} else {
// Bit Mode
int and_mask = bits(ds_pattern, 4, 0);
int or_mask = bits(ds_pattern, 9, 5);
int xor_mask = bits(ds_pattern, 14, 10);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
int index = (((lane & and_mask) | or_mask) ^ xor_mask);
// Adjust for the next 32 lanes.
if (lane > 31) {
index += 32;
}
panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is "
"out of bounds.\n", gpuDynInst->disassemble(),
index);
vdst[lane]
= gpuDynInst->exec_mask[index] ? data[index] : 0;
}
}
}
vdst.write();
} // execute
// --- Inst_DS__DS_PERMUTE_B32 class methods ---
Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_permute_b32")
{
setFlag(MemoryRef);
/**
* While this operation doesn't actually use DS storage we classify
* it as a load here because it does a writeback to a VGPR, which
* fits in better with the LDS pipeline logic.
*/
setFlag(Load);
} // Inst_DS__DS_PERMUTE_B32
Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32()
{
} // ~Inst_DS__DS_PERMUTE_B32
// --- description from .arch file ---
// Forward permute.
void
Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
wf->decLGKMInstsIssued();
if (gpuDynInst->exec_mask.none()) {
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()
->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
VecOperandU32 vdst(gpuDynInst, extData.VDST);
addr.read();
data.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
/**
* One of the offset fields can be used for the index.
* It is assumed OFFSET0 would be used, as OFFSET1 is
* typically only used for DS ops that operate on two
* disparate pieces of data.
*/
assert(!instData.OFFSET1);
/**
* The address provided is a byte address, but VGPRs are
* 4 bytes, so we must divide by 4 to get the actual VGPR
* index. Additionally, the index is calculated modulo the
* WF size, 64 in this case, so we simply extract bits 7-2.
*/
int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
"of bounds.\n", gpuDynInst->disassemble(), index);
/**
* If the shuffled index corresponds to a lane that is
* inactive then this instruction writes a 0 to the active
* lane in VDST.
*/
if (wf->execMask(index)) {
vdst[index] = data[lane];
} else {
vdst[index] = 0;
}
}
}
vdst.write();
} // execute
// --- Inst_DS__DS_BPERMUTE_B32 class methods ---
Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_bpermute_b32")
{
setFlag(MemoryRef);
/**
* While this operation doesn't actually use DS storage we classify
* it as a load here because it does a writeback to a VGPR, which
* fits in better with the LDS pipeline logic.
*/
setFlag(Load);
} // Inst_DS__DS_BPERMUTE_B32
Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32()
{
} // ~Inst_DS__DS_BPERMUTE_B32
// --- description from .arch file ---
// Backward permute.
void
Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
wf->decLGKMInstsIssued();
if (gpuDynInst->exec_mask.none()) {
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()
->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
VecOperandU32 vdst(gpuDynInst, extData.VDST);
addr.read();
data.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
/**
* One of the offset fields can be used for the index.
* It is assumed OFFSET0 would be used, as OFFSET1 is
* typically only used for DS ops that operate on two
* disparate pieces of data.
*/
assert(!instData.OFFSET1);
/**
* The address provided is a byte address, but VGPRs are
* 4 bytes, so we must divide by 4 to get the actual VGPR
* index. Additionally, the index is calculated modulo the
* WF size, 64 in this case, so we simply extract bits 7-2.
*/
int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
"of bounds.\n", gpuDynInst->disassemble(), index);
/**
* If the shuffled index corresponds to a lane that is
* inactive then this instruction writes a 0 to the active
* lane in VDST.
*/
if (wf->execMask(index)) {
vdst[lane] = data[index];
} else {
vdst[lane] = 0;
}
}
}
vdst.write();
} // execute
// --- Inst_DS__DS_ADD_U64 class methods ---
Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_u64")
{
} // Inst_DS__DS_ADD_U64
Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64()
{
} // ~Inst_DS__DS_ADD_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_SUB_U64 class methods ---
Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_sub_u64")
{
} // Inst_DS__DS_SUB_U64
Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64()
{
} // ~Inst_DS__DS_SUB_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_RSUB_U64 class methods ---
Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_rsub_u64")
{
} // Inst_DS__DS_RSUB_U64
Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64()
{
} // ~Inst_DS__DS_RSUB_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA - MEM[ADDR];
// RETURN_DATA = tmp.
// Subtraction with reversed operands.
void
Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_INC_U64 class methods ---
Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_inc_u64")
{
} // Inst_DS__DS_INC_U64
Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64()
{
} // ~Inst_DS__DS_INC_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_DEC_U64 class methods ---
Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_dec_u64")
{
} // Inst_DS__DS_DEC_U64
Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64()
{
} // ~Inst_DS__DS_DEC_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
// (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_I64 class methods ---
Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_i64")
{
} // Inst_DS__DS_MIN_I64
Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64()
{
} // ~Inst_DS__DS_MIN_I64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_I64 class methods ---
Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_i64")
{
} // Inst_DS__DS_MAX_I64
Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64()
{
} // ~Inst_DS__DS_MAX_I64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_U64 class methods ---
Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_u64")
{
} // Inst_DS__DS_MIN_U64
Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64()
{
} // ~Inst_DS__DS_MIN_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_U64 class methods ---
Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_u64")
{
} // Inst_DS__DS_MAX_U64
Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64()
{
} // ~Inst_DS__DS_MAX_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_AND_B64 class methods ---
Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_and_b64")
{
} // Inst_DS__DS_AND_B64
Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64()
{
} // ~Inst_DS__DS_AND_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] &= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_OR_B64 class methods ---
Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_or_b64")
{
} // Inst_DS__DS_OR_B64
Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64()
{
} // ~Inst_DS__DS_OR_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] |= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_XOR_B64 class methods ---
Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_xor_b64")
{
} // Inst_DS__DS_XOR_B64
Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64()
{
} // ~Inst_DS__DS_XOR_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] ^= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MSKOR_B64 class methods ---
Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_mskor_b64")
{
} // Inst_DS__DS_MSKOR_B64
Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64()
{
} // ~Inst_DS__DS_MSKOR_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
// RETURN_DATA = tmp.
// Masked dword OR, D0 contains the mask and D1 contains the new value.
void
Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRITE_B64 class methods ---
Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write_b64")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE_B64
Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64()
{
} // ~Inst_DS__DS_WRITE_B64
// --- description from .arch file ---
// 64b:
// MEM[ADDR] = DATA.
// Write qword.
void
Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
addr.read();
data.read();
calcAddr(gpuDynInst, addr);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0;
Addr offset1 = instData.OFFSET1;
Addr offset = (offset1 << 8) | offset0;
initMemWrite<VecElemU64>(gpuDynInst, offset);
} // initiateAcc
void
Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_DS__DS_WRITE2_B64 class methods ---
Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write2_b64")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE2_B64
Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64()
{
} // ~Inst_DS__DS_WRITE2_B64
// --- description from .arch file ---
// 64b:
// MEM[ADDR_BASE + OFFSET0 * 8] = DATA;
// MEM[ADDR_BASE + OFFSET1 * 8] = DATA2.
// Write 2 qwords.
void
Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU64 data0(gpuDynInst, extData.DATA0);
ConstVecOperandU64 data1(gpuDynInst, extData.DATA1);
addr.read();
data0.read();
data1.read();
calcAddr(gpuDynInst, addr);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane * 2] = data0[lane];
(reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
}
}
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0 * 8;
Addr offset1 = instData.OFFSET1 * 8;
initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1);
}
void
Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst)
{
}
// --- Inst_DS__DS_WRITE2ST64_B64 class methods ---
Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write2st64_b64")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE2ST64_B64
Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64()
{
} // ~Inst_DS__DS_WRITE2ST64_B64
// --- description from .arch file ---
// 64b:
// MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA;
// MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2;
// Write 2 qwords.
void
Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_CMPST_B64 class methods ---
Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_cmpst_b64")
{
} // Inst_DS__DS_CMPST_B64
Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64()
{
} // ~Inst_DS__DS_CMPST_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// src = DATA2;
// cmp = DATA;
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
// Compare and store.
// Caution, the order of src and cmp are the *opposite* of the
// --- BUFFER_ATOMIC_CMPSWAP_X2 opcode.
void
Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_CMPST_F64 class methods ---
Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_cmpst_f64")
{
setFlag(F64);
} // Inst_DS__DS_CMPST_F64
Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64()
{
} // ~Inst_DS__DS_CMPST_F64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// src = DATA2;
// cmp = DATA;
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
// Floating point compare and store that handles NaN/INF/denormal values.
// Caution, the order of src and cmp are the *opposite* of the
// --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode.
void
Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_F64 class methods ---
Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_f64")
{
setFlag(F64);
} // Inst_DS__DS_MIN_F64
Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64()
{
} // ~Inst_DS__DS_MIN_F64
// --- description from .arch file ---
// 64b.
// tmp = MEM[ADDR];
// src = DATA;
// cmp = DATA2;
// MEM[ADDR] = (cmp < tmp) ? src : tmp.
// Floating point minimum that handles NaN/INF/denormal values.
// Note that this opcode is slightly more general-purpose than
// --- BUFFER_ATOMIC_FMIN_X2.
void
Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_F64 class methods ---
Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_f64")
{
setFlag(F64);
} // Inst_DS__DS_MAX_F64
Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64()
{
} // ~Inst_DS__DS_MAX_F64
// --- description from .arch file ---
// 64b.
// tmp = MEM[ADDR];
// src = DATA;
// cmp = DATA2;
// MEM[ADDR] = (tmp > cmp) ? src : tmp.
// Floating point maximum that handles NaN/INF/denormal values.
// Note that this opcode is slightly more general-purpose than
// --- BUFFER_ATOMIC_FMAX_X2.
void
Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_ADD_RTN_U64 class methods ---
Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_rtn_u64")
{
} // Inst_DS__DS_ADD_RTN_U64
Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64()
{
} // ~Inst_DS__DS_ADD_RTN_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_SUB_RTN_U64 class methods ---
Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_sub_rtn_u64")
{
} // Inst_DS__DS_SUB_RTN_U64
Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64()
{
} // ~Inst_DS__DS_SUB_RTN_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_RSUB_RTN_U64 class methods ---
Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_rsub_rtn_u64")
{
} // Inst_DS__DS_RSUB_RTN_U64
Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64()
{
} // ~Inst_DS__DS_RSUB_RTN_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA - MEM[ADDR];
// RETURN_DATA = tmp.
// Subtraction with reversed operands.
void
Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_INC_RTN_U64 class methods ---
Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_inc_rtn_u64")
{
} // Inst_DS__DS_INC_RTN_U64
Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64()
{
} // ~Inst_DS__DS_INC_RTN_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_DEC_RTN_U64 class methods ---
Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_dec_rtn_u64")
{
} // Inst_DS__DS_DEC_RTN_U64
Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64()
{
} // ~Inst_DS__DS_DEC_RTN_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
// (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_RTN_I64 class methods ---
Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_rtn_i64")
{
} // Inst_DS__DS_MIN_RTN_I64
Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64()
{
} // ~Inst_DS__DS_MIN_RTN_I64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_RTN_I64 class methods ---
Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_rtn_i64")
{
} // Inst_DS__DS_MAX_RTN_I64
Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64()
{
} // ~Inst_DS__DS_MAX_RTN_I64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_RTN_U64 class methods ---
Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_rtn_u64")
{
} // Inst_DS__DS_MIN_RTN_U64
Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64()
{
} // ~Inst_DS__DS_MIN_RTN_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_RTN_U64 class methods ---
Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_rtn_u64")
{
} // Inst_DS__DS_MAX_RTN_U64
Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64()
{
} // ~Inst_DS__DS_MAX_RTN_U64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_AND_RTN_B64 class methods ---
Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_and_rtn_b64")
{
} // Inst_DS__DS_AND_RTN_B64
Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64()
{
} // ~Inst_DS__DS_AND_RTN_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] &= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_OR_RTN_B64 class methods ---
Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_or_rtn_b64")
{
} // Inst_DS__DS_OR_RTN_B64
Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64()
{
} // ~Inst_DS__DS_OR_RTN_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] |= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_XOR_RTN_B64 class methods ---
Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_xor_rtn_b64")
{
} // Inst_DS__DS_XOR_RTN_B64
Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64()
{
} // ~Inst_DS__DS_XOR_RTN_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] ^= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MSKOR_RTN_B64 class methods ---
Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_mskor_rtn_b64")
{
} // Inst_DS__DS_MSKOR_RTN_B64
Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64()
{
} // ~Inst_DS__DS_MSKOR_RTN_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
// RETURN_DATA = tmp.
// Masked dword OR, D0 contains the mask and D1 contains the new value.
void
Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRXCHG_RTN_B64 class methods ---
Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_wrxchg_rtn_b64")
{
} // Inst_DS__DS_WRXCHG_RTN_B64
Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64()
{
} // ~Inst_DS__DS_WRXCHG_RTN_B64
// --- description from .arch file ---
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA;
// RETURN_DATA = tmp.
// Write-exchange operation.
void
Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRXCHG2_RTN_B64 class methods ---
Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_wrxchg2_rtn_b64")
{
} // Inst_DS__DS_WRXCHG2_RTN_B64
Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64()
{
} // ~Inst_DS__DS_WRXCHG2_RTN_B64
// --- description from .arch file ---
// Write-exchange 2 separate qwords.
void
Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRXCHG2ST64_RTN_B64 class methods ---
Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64(
InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64")
{
} // Inst_DS__DS_WRXCHG2ST64_RTN_B64
Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64()
{
} // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64
// --- description from .arch file ---
// Write-exchange 2 qwords with a stride of 64 qwords.
void
Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_CMPST_RTN_B64 class methods ---
Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_cmpst_rtn_b64")
{
} // Inst_DS__DS_CMPST_RTN_B64
Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64()
{
} // ~Inst_DS__DS_CMPST_RTN_B64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// src = DATA2;
// cmp = DATA;
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
// Compare and store.
// Caution, the order of src and cmp are the *opposite* of the
// --- BUFFER_ATOMIC_CMPSWAP_X2 opcode.
void
Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_CMPST_RTN_F64 class methods ---
Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_cmpst_rtn_f64")
{
setFlag(F64);
} // Inst_DS__DS_CMPST_RTN_F64
Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64()
{
} // ~Inst_DS__DS_CMPST_RTN_F64
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// src = DATA2;
// cmp = DATA;
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
// Floating point compare and store that handles NaN/INF/denormal values.
// Caution, the order of src and cmp are the *opposite* of the
// --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode.
void
Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_RTN_F64 class methods ---
Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_rtn_f64")
{
setFlag(F64);
} // Inst_DS__DS_MIN_RTN_F64
Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64()
{
} // ~Inst_DS__DS_MIN_RTN_F64
// --- description from .arch file ---
// 64b.
// tmp = MEM[ADDR];
// src = DATA;
// cmp = DATA2;
// MEM[ADDR] = (cmp < tmp) ? src : tmp.
// Floating point minimum that handles NaN/INF/denormal values.
// Note that this opcode is slightly more general-purpose than
// --- BUFFER_ATOMIC_FMIN_X2.
void
Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_RTN_F64 class methods ---
Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_rtn_f64")
{
setFlag(F64);
} // Inst_DS__DS_MAX_RTN_F64
Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64()
{
} // ~Inst_DS__DS_MAX_RTN_F64
// --- description from .arch file ---
// 64b.
// tmp = MEM[ADDR];
// src = DATA;
// cmp = DATA2;
// MEM[ADDR] = (tmp > cmp) ? src : tmp.
// Floating point maximum that handles NaN/INF/denormal values.
// Note that this opcode is slightly more general-purpose than
// --- BUFFER_ATOMIC_FMAX_X2.
void
Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_READ_B64 class methods ---
Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read_b64")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ_B64
Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64()
{
} // ~Inst_DS__DS_READ_B64
// --- description from .arch file ---
// RETURN_DATA = MEM[ADDR].
// Read 1 qword.
void
Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr);
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0;
Addr offset1 = instData.OFFSET1;
Addr offset = (offset1 << 8) | offset0;
initMemRead<VecElemU64>(gpuDynInst, offset);
} // initiateAcc
void
Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU64 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
} // completeAcc
// --- Inst_DS__DS_READ2_B64 class methods ---
Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read2_b64")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ2_B64
Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64()
{
} // ~Inst_DS__DS_READ2_B64
// --- description from .arch file ---
// RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8];
// RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8].
// Read 2 qwords.
void
Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr);
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = instData.OFFSET0 * 8;
Addr offset1 = instData.OFFSET1 * 8;
initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
} // initiateAcc
void
Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU64 vdst0(gpuDynInst, extData.VDST);
VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane * 2];
vdst1[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane * 2 + 1];
}
}
vdst0.write();
vdst1.write();
} // completeAcc
// --- Inst_DS__DS_READ2ST64_B64 class methods ---
Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read2st64_b64")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ2ST64_B64
Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64()
{
} // ~Inst_DS__DS_READ2ST64_B64
// --- description from .arch file ---
// RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64];
// RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64].
// Read 2 qwords.
void
Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr);
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
{
Addr offset0 = (instData.OFFSET0 * 8 * 64);
Addr offset1 = (instData.OFFSET1 * 8 * 64);
initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
}
void
Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU64 vdst0(gpuDynInst, extData.VDST);
VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane * 2];
vdst1[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane * 2 + 1];
}
}
vdst0.write();
vdst1.write();
}
// --- Inst_DS__DS_CONDXCHG32_RTN_B64 class methods ---
Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64(
InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_condxchg32_rtn_b64")
{
} // Inst_DS__DS_CONDXCHG32_RTN_B64
Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64()
{
} // ~Inst_DS__DS_CONDXCHG32_RTN_B64
// --- description from .arch file ---
// Conditional write exchange.
void
Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_ADD_SRC2_U32 class methods ---
Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_src2_u32")
{
} // Inst_DS__DS_ADD_SRC2_U32
Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32()
{
} // ~Inst_DS__DS_ADD_SRC2_U32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] + MEM[B].
void
Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_SUB_SRC2_U32 class methods ---
Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_sub_src2_u32")
{
} // Inst_DS__DS_SUB_SRC2_U32
Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32()
{
} // ~Inst_DS__DS_SUB_SRC2_U32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] - MEM[B].
void
Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_RSUB_SRC2_U32 class methods ---
Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_rsub_src2_u32")
{
} // Inst_DS__DS_RSUB_SRC2_U32
Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32()
{
} // ~Inst_DS__DS_RSUB_SRC2_U32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[B] - MEM[A].
void
Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_INC_SRC2_U32 class methods ---
Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_inc_src2_u32")
{
} // Inst_DS__DS_INC_SRC2_U32
Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32()
{
} // ~Inst_DS__DS_INC_SRC2_U32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
void
Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_DEC_SRC2_U32 class methods ---
Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_dec_src2_u32")
{
} // Inst_DS__DS_DEC_SRC2_U32
Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32()
{
} // ~Inst_DS__DS_DEC_SRC2_U32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
// Uint decrement.
void
Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_SRC2_I32 class methods ---
Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_src2_i32")
{
} // Inst_DS__DS_MIN_SRC2_I32
Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32()
{
} // ~Inst_DS__DS_MIN_SRC2_I32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = min(MEM[A], MEM[B]).
void
Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_SRC2_I32 class methods ---
Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_src2_i32")
{
} // Inst_DS__DS_MAX_SRC2_I32
Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32()
{
} // ~Inst_DS__DS_MAX_SRC2_I32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = max(MEM[A], MEM[B]).
void
Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_SRC2_U32 class methods ---
Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_src2_u32")
{
} // Inst_DS__DS_MIN_SRC2_U32
Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32()
{
} // ~Inst_DS__DS_MIN_SRC2_U32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = min(MEM[A], MEM[B]).
void
Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_SRC2_U32 class methods ---
Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_src2_u32")
{
} // Inst_DS__DS_MAX_SRC2_U32
Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32()
{
} // ~Inst_DS__DS_MAX_SRC2_U32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = max(MEM[A], MEM[B]).
void
Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_AND_SRC2_B32 class methods ---
Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_and_src2_b32")
{
} // Inst_DS__DS_AND_SRC2_B32
Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32()
{
} // ~Inst_DS__DS_AND_SRC2_B32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] & MEM[B].
void
Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_OR_SRC2_B32 class methods ---
Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_or_src2_b32")
{
} // Inst_DS__DS_OR_SRC2_B32
Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32()
{
} // ~Inst_DS__DS_OR_SRC2_B32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] | MEM[B].
void
Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_XOR_SRC2_B32 class methods ---
Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_xor_src2_b32")
{
} // Inst_DS__DS_XOR_SRC2_B32
Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32()
{
} // ~Inst_DS__DS_XOR_SRC2_B32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] ^ MEM[B].
void
Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRITE_SRC2_B32 class methods ---
Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write_src2_b32")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE_SRC2_B32
Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32()
{
} // ~Inst_DS__DS_WRITE_SRC2_B32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[B].
// Write dword.
void
Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_SRC2_F32 class methods ---
Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_src2_f32")
{
setFlag(F32);
} // Inst_DS__DS_MIN_SRC2_F32
Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32()
{
} // ~Inst_DS__DS_MIN_SRC2_F32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
// Float, handles NaN/INF/denorm.
void
Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_SRC2_F32 class methods ---
Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_src2_f32")
{
setFlag(F32);
} // Inst_DS__DS_MAX_SRC2_F32
Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32()
{
} // ~Inst_DS__DS_MAX_SRC2_F32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
// Float, handles NaN/INF/denorm.
void
Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_ADD_SRC2_F32 class methods ---
Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_src2_f32")
{
setFlag(F32);
} // Inst_DS__DS_ADD_SRC2_F32
Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32()
{
} // ~Inst_DS__DS_ADD_SRC2_F32
// --- description from .arch file ---
// 32b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[B] + MEM[A].
// Float, handles NaN/INF/denorm.
void
Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_GWS_SEMA_RELEASE_ALL class methods ---
Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL(
InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_gws_sema_release_all")
{
} // Inst_DS__DS_GWS_SEMA_RELEASE_ALL
Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL()
{
} // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL
// --- description from .arch file ---
// GDS Only: The GWS resource (rid) indicated will process this opcode by
// updating the counter and labeling the specified resource as a semaphore.
// //Determine the GWS resource to work on
// rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
// //Incr the state counter of the resource
// state.counter[rid] = state.wave_in_queue;
// state.type = SEMAPHORE;
// return rd_done; //release calling wave
// This action will release ALL queued waves; it Will have no effect if no
// --- waves are present.
void
Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_GWS_INIT class methods ---
Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_gws_init")
{
} // Inst_DS__DS_GWS_INIT
Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT()
{
} // ~Inst_DS__DS_GWS_INIT
// --- description from .arch file ---
// GDS Only: Initialize a barrier or semaphore resource.
// //Determine the GWS resource to work on
// rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
// //Get the value to use in init
// index = find_first_valid(vector mask)
// value = DATA[thread: index]
// //Set the state of the resource
// state.counter[rid] = lsb(value); //limit #waves
// state.flag[rid] = 0;
// return rd_done; //release calling wave
void
Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_GWS_SEMA_V class methods ---
Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_gws_sema_v")
{
} // Inst_DS__DS_GWS_SEMA_V
Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V()
{
} // ~Inst_DS__DS_GWS_SEMA_V
// --- description from .arch file ---
// GDS Only: The GWS resource indicated will process this opcode by
// updating the counter and labeling the resource as a semaphore.
// //Determine the GWS resource to work on
// rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
// //Incr the state counter of the resource
// state.counter[rid]++;
// state.type = SEMAPHORE;
// return rd_done; //release calling wave
// This action will release one waved if any are queued in this resource.
void
Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_GWS_SEMA_BR class methods ---
Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_gws_sema_br")
{
} // Inst_DS__DS_GWS_SEMA_BR
Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR()
{
} // ~Inst_DS__DS_GWS_SEMA_BR
// --- description from .arch file ---
// GDS Only: The GWS resource indicated will process this opcode by
// updating the counter by the bulk release delivered count and labeling
// the resource as a semaphore.
// //Determine the GWS resource to work on
// rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
// index = find first valid (vector mask)
// count = DATA[thread: index];
// //Add count to the resource state counter
// state.counter[rid] += count;
// state.type = SEMAPHORE;
// return rd_done; //release calling wave
// This action will release count number of waves, immediately if queued,
// or as they arrive from the noted resource.
void
Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_GWS_SEMA_P class methods ---
Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_gws_sema_p")
{
} // Inst_DS__DS_GWS_SEMA_P
Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P()
{
} // ~Inst_DS__DS_GWS_SEMA_P
// --- description from .arch file ---
// GDS Only: The GWS resource indicated will process this opcode by
// queueing it until counter enables a release and then decrementing the
// counter of the resource as a semaphore.
// //Determine the GWS resource to work on
// rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
// state.type = SEMAPHORE;
// ENQUEUE until(state[rid].counter > 0)
// state[rid].counter--;
// return rd_done
void
Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_GWS_BARRIER class methods ---
Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_gws_barrier")
{
} // Inst_DS__DS_GWS_BARRIER
Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER()
{
} // ~Inst_DS__DS_GWS_BARRIER
// --- description from .arch file ---
// GDS Only: The GWS resource indicated will process this opcode by
// queueing it until barrier is satisfied. The number of waves needed is
// passed in as DATA of first valid thread.
// //Determine the GWS resource to work on
// rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + OFFSET0[5:0];
// index = find first valid (vector mask);
// value = DATA[thread: index];
// // Input Decision Machine
// state.type[rid] = BARRIER;
// if(state[rid].counter <= 0) {
// thread[rid].flag = state[rid].flag;
// ENQUEUE;
// state[rid].flag = !state.flag;
// state[rid].counter = value;
// return rd_done;
// } else {
// state[rid].counter--;
// thread.flag = state[rid].flag;
// ENQUEUE;
// }
// Since the waves deliver the count for the next barrier, this function
// can have a different size barrier for each occurrence.
// // Release Machine
// if(state.type == BARRIER) {
// if(state.flag != thread.flag) {
// return rd_done;
// }
// }
void
Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_CONSUME class methods ---
Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_consume")
{
} // Inst_DS__DS_CONSUME
Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME()
{
} // ~Inst_DS__DS_CONSUME
// --- description from .arch file ---
// LDS & GDS. Subtract (count_bits(exec_mask)) from the value stored in DS
// memory at (M0.base + instr_offset). Return the pre-operation value to
// VGPRs.
void
Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_APPEND class methods ---
Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_append")
{
} // Inst_DS__DS_APPEND
Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND()
{
} // ~Inst_DS__DS_APPEND
// --- description from .arch file ---
// LDS & GDS. Add (count_bits(exec_mask)) to the value stored in DS memory
// at (M0.base + instr_offset). Return the pre-operation value to VGPRs.
void
Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_ORDERED_COUNT class methods ---
Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_ordered_count")
{
} // Inst_DS__DS_ORDERED_COUNT
Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT()
{
} // ~Inst_DS__DS_ORDERED_COUNT
// --- description from .arch file ---
// GDS-only. Add (count_bits(exec_mask)) to one of 4 dedicated
// ordered-count counters (aka 'packers'). Additional bits of instr.offset
// field are overloaded to hold packer-id, 'last'.
void
Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_ADD_SRC2_U64 class methods ---
Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_src2_u64")
{
} // Inst_DS__DS_ADD_SRC2_U64
Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64()
{
} // ~Inst_DS__DS_ADD_SRC2_U64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] + MEM[B].
void
Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_SUB_SRC2_U64 class methods ---
Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_sub_src2_u64")
{
} // Inst_DS__DS_SUB_SRC2_U64
Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64()
{
} // ~Inst_DS__DS_SUB_SRC2_U64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] - MEM[B].
void
Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_RSUB_SRC2_U64 class methods ---
Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_rsub_src2_u64")
{
} // Inst_DS__DS_RSUB_SRC2_U64
Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64()
{
} // ~Inst_DS__DS_RSUB_SRC2_U64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[B] - MEM[A].
void
Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_INC_SRC2_U64 class methods ---
Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_inc_src2_u64")
{
} // Inst_DS__DS_INC_SRC2_U64
Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64()
{
} // ~Inst_DS__DS_INC_SRC2_U64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
void
Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_DEC_SRC2_U64 class methods ---
Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_dec_src2_u64")
{
} // Inst_DS__DS_DEC_SRC2_U64
Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64()
{
} // ~Inst_DS__DS_DEC_SRC2_U64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
// Uint decrement.
void
Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_SRC2_I64 class methods ---
Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_src2_i64")
{
} // Inst_DS__DS_MIN_SRC2_I64
Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64()
{
} // ~Inst_DS__DS_MIN_SRC2_I64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = min(MEM[A], MEM[B]).
void
Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_SRC2_I64 class methods ---
Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_src2_i64")
{
} // Inst_DS__DS_MAX_SRC2_I64
Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64()
{
} // ~Inst_DS__DS_MAX_SRC2_I64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = max(MEM[A], MEM[B]).
void
Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_SRC2_U64 class methods ---
Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_src2_u64")
{
} // Inst_DS__DS_MIN_SRC2_U64
Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64()
{
} // ~Inst_DS__DS_MIN_SRC2_U64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = min(MEM[A], MEM[B]).
void
Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_SRC2_U64 class methods ---
Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_src2_u64")
{
} // Inst_DS__DS_MAX_SRC2_U64
Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64()
{
} // ~Inst_DS__DS_MAX_SRC2_U64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = max(MEM[A], MEM[B]).
void
Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_AND_SRC2_B64 class methods ---
Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_and_src2_b64")
{
} // Inst_DS__DS_AND_SRC2_B64
Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64()
{
} // ~Inst_DS__DS_AND_SRC2_B64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] & MEM[B].
void
Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_OR_SRC2_B64 class methods ---
Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_or_src2_b64")
{
} // Inst_DS__DS_OR_SRC2_B64
Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64()
{
} // ~Inst_DS__DS_OR_SRC2_B64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] | MEM[B].
void
Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_XOR_SRC2_B64 class methods ---
Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_xor_src2_b64")
{
} // Inst_DS__DS_XOR_SRC2_B64
Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64()
{
} // ~Inst_DS__DS_XOR_SRC2_B64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[A] ^ MEM[B].
void
Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRITE_SRC2_B64 class methods ---
Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write_src2_b64")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE_SRC2_B64
Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64()
{
} // ~Inst_DS__DS_WRITE_SRC2_B64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = MEM[B].
// Write qword.
void
Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MIN_SRC2_F64 class methods ---
Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_min_src2_f64")
{
setFlag(F64);
} // Inst_DS__DS_MIN_SRC2_F64
Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64()
{
} // ~Inst_DS__DS_MIN_SRC2_F64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
// Float, handles NaN/INF/denorm.
void
Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_MAX_SRC2_F64 class methods ---
Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_max_src2_f64")
{
setFlag(F64);
} // Inst_DS__DS_MAX_SRC2_F64
Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64()
{
} // ~Inst_DS__DS_MAX_SRC2_F64
// --- description from .arch file ---
// 64b:
// A = ADDR_BASE;
// B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
// --- {offset1[6],offset1[6:0],offset0});
// MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
// Float, handles NaN/INF/denorm.
void
Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRITE_B96 class methods ---
Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write_b96")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE_B96
Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96()
{
} // ~Inst_DS__DS_WRITE_B96
// --- description from .arch file ---
// {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0].
// Tri-dword write.
void
Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_WRITE_B128 class methods ---
Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_write_b128")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_DS__DS_WRITE_B128
Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128()
{
} // ~Inst_DS__DS_WRITE_B128
// --- description from .arch file ---
// {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0].
// Qword write.
void
Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_READ_B96 class methods ---
Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read_b96")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ_B96
Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96()
{
} // ~Inst_DS__DS_READ_B96
// --- description from .arch file ---
// Tri-dword read.
void
Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_DS__DS_READ_B128 class methods ---
Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_read_b128")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_DS__DS_READ_B128
Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128()
{
} // ~Inst_DS__DS_READ_B128
// --- description from .arch file ---
// Qword read.
void
Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_FORMAT_X class methods ---
Inst_MUBUF__BUFFER_LOAD_FORMAT_X
::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_format_x")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_FORMAT_X
Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X()
{
} // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X
// --- description from .arch file ---
// Untyped buffer load 1 dword with format conversion.
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XY class methods ---
Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_format_xy")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY()
{
} // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
// --- description from .arch file ---
// Untyped buffer load 2 dwords with format conversion.
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ class methods ---
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_format_xyz")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ()
{
} // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
// --- description from .arch file ---
// Untyped buffer load 3 dwords with format conversion.
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW class methods ---
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_format_xyzw")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW()
{
} // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
// --- description from .arch file ---
// Untyped buffer load 4 dwords with format conversion.
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_FORMAT_X class methods ---
Inst_MUBUF__BUFFER_STORE_FORMAT_X
::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_format_x")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_STORE_FORMAT_X
Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X()
{
} // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X
// --- description from .arch file ---
// Untyped buffer store 1 dword with format conversion.
void
Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_FORMAT_XY class methods ---
Inst_MUBUF__BUFFER_STORE_FORMAT_XY
::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_format_xy")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_STORE_FORMAT_XY
Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY()
{
} // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY
// --- description from .arch file ---
// Untyped buffer store 2 dwords with format conversion.
void
Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ class methods ---
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_format_xyz")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ()
{
} // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
// --- description from .arch file ---
// Untyped buffer store 3 dwords with format conversion.
void
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW class methods ---
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_format_xyzw")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
{
} // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
// --- description from .arch file ---
// Untyped buffer store 4 dwords with format conversion.
void
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X class methods ---
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_format_d16_x")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
{
} // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
// --- description from .arch file ---
// Untyped buffer load 1 dword with format conversion.
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY class methods ---
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_format_d16_xy")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
{
} // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
// --- description from .arch file ---
// Untyped buffer load 2 dwords with format conversion.
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ class methods ---
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
{
} // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
// --- description from .arch file ---
// Untyped buffer load 3 dwords with format conversion.
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW class methods ---
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
{
} // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
// --- description from .arch file ---
// Untyped buffer load 4 dwords with format conversion.
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X class methods ---
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_format_d16_x")
{
setFlag(Store);
} // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
{
} // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
// --- description from .arch file ---
// Untyped buffer store 1 dword with format conversion.
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY class methods ---
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_format_d16_xy")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
{
} // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
// --- description from .arch file ---
// Untyped buffer store 2 dwords with format conversion.
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ class methods ---
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
{
} // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
// --- description from .arch file ---
// Untyped buffer store 3 dwords with format conversion.
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW class methods ---
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
{
} // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
// --- description from .arch file ---
// Untyped buffer store 4 dwords with format conversion.
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_UBYTE class methods ---
Inst_MUBUF__BUFFER_LOAD_UBYTE
::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_ubyte")
{
setFlag(MemoryRef);
setFlag(Load);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_LOAD_UBYTE
Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE()
{
} // ~Inst_MUBUF__BUFFER_LOAD_UBYTE
// --- description from .arch file ---
// Untyped buffer load unsigned byte (zero extend to VGPR destination).
void
Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
rsrcDesc.read();
offset.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<VecElemU8>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst(gpuDynInst, extData.VDATA);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
if (!oobMask[lane]) {
vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
gpuDynInst->d_data))[lane]);
} else {
vdst[lane] = 0;
}
}
}
vdst.write();
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_SBYTE class methods ---
Inst_MUBUF__BUFFER_LOAD_SBYTE
::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_sbyte")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_SBYTE
Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE()
{
} // ~Inst_MUBUF__BUFFER_LOAD_SBYTE
// --- description from .arch file ---
// Untyped buffer load signed byte (sign extend to VGPR destination).
void
Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_USHORT class methods ---
Inst_MUBUF__BUFFER_LOAD_USHORT
::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_ushort")
{
setFlag(MemoryRef);
setFlag(Load);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_LOAD_USHORT
Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT()
{
} // ~Inst_MUBUF__BUFFER_LOAD_USHORT
// --- description from .arch file ---
// Untyped buffer load unsigned short (zero extend to VGPR destination).
void
Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
rsrcDesc.read();
offset.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<VecElemU16>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst(gpuDynInst, extData.VDATA);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
if (!oobMask[lane]) {
vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
gpuDynInst->d_data))[lane]);
} else {
vdst[lane] = 0;
}
}
}
vdst.write();
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_SSHORT class methods ---
Inst_MUBUF__BUFFER_LOAD_SSHORT
::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_sshort")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_LOAD_SSHORT
Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT()
{
} // ~Inst_MUBUF__BUFFER_LOAD_SSHORT
// --- description from .arch file ---
// Untyped buffer load signed short (sign extend to VGPR destination).
void
Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_LOAD_DWORD class methods ---
Inst_MUBUF__BUFFER_LOAD_DWORD
::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_dword")
{
setFlag(MemoryRef);
setFlag(Load);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_LOAD_DWORD
Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD()
{
} // ~Inst_MUBUF__BUFFER_LOAD_DWORD
// --- description from .arch file ---
// Untyped buffer load dword.
void
Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
rsrcDesc.read();
offset.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<VecElemU32>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst(gpuDynInst, extData.VDATA);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
if (!oobMask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane];
} else {
vdst[lane] = 0;
}
}
}
vdst.write();
} // completeAcc
// --- Inst_MUBUF__BUFFER_LOAD_DWORDX2 class methods ---
Inst_MUBUF__BUFFER_LOAD_DWORDX2
::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_dwordx2")
{
setFlag(MemoryRef);
setFlag(Load);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_LOAD_DWORDX2
Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2()
{
} // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2
// --- description from .arch file ---
// Untyped buffer load 2 dwords.
void
Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
rsrcDesc.read();
offset.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<2>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
if (!oobMask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2];
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 2 + 1];
} else {
vdst0[lane] = 0;
vdst1[lane] = 0;
}
}
}
vdst0.write();
vdst1.write();
} // completeAcc
// --- Inst_MUBUF__BUFFER_LOAD_DWORDX3 class methods ---
Inst_MUBUF__BUFFER_LOAD_DWORDX3
::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_dwordx3")
{
setFlag(MemoryRef);
setFlag(Load);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_LOAD_DWORDX3
Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3()
{
} // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3
// --- description from .arch file ---
// Untyped buffer load 3 dwords.
void
Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
rsrcDesc.read();
offset.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<3>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
if (!oobMask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 3];
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 3 + 1];
vdst2[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 3 + 2];
} else {
vdst0[lane] = 0;
vdst1[lane] = 0;
vdst2[lane] = 0;
}
}
}
vdst0.write();
vdst1.write();
vdst2.write();
} // completeAcc
// --- Inst_MUBUF__BUFFER_LOAD_DWORDX4 class methods ---
Inst_MUBUF__BUFFER_LOAD_DWORDX4
::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_load_dwordx4")
{
setFlag(MemoryRef);
setFlag(Load);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_LOAD_DWORDX4
Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4()
{
} // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4
// --- description from .arch file ---
// Untyped buffer load 4 dwords.
void
Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
rsrcDesc.read();
offset.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<4>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
if (!oobMask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4];
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4 + 1];
vdst2[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4 + 2];
vdst3[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4 + 3];
} else {
vdst0[lane] = 0;
vdst1[lane] = 0;
vdst2[lane] = 0;
vdst3[lane] = 0;
}
}
}
vdst0.write();
vdst1.write();
vdst2.write();
vdst3.write();
} // completeAcc
// --- Inst_MUBUF__BUFFER_STORE_BYTE class methods ---
Inst_MUBUF__BUFFER_STORE_BYTE
::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_byte")
{
setFlag(MemoryRef);
setFlag(Store);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_STORE_BYTE
Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE()
{
} // ~Inst_MUBUF__BUFFER_STORE_BYTE
// --- description from .arch file ---
// Untyped buffer store byte.
void
Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
ConstVecOperandI8 data(gpuDynInst, extData.VDATA);
rsrcDesc.read();
offset.read();
data.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemI8*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
} // execute
void
Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<VecElemI8>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_SHORT class methods ---
Inst_MUBUF__BUFFER_STORE_SHORT
::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_short")
{
setFlag(MemoryRef);
setFlag(Store);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_STORE_SHORT
Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT()
{
} // ~Inst_MUBUF__BUFFER_STORE_SHORT
// --- description from .arch file ---
// Untyped buffer store short.
void
Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
ConstVecOperandI16 data(gpuDynInst, extData.VDATA);
rsrcDesc.read();
offset.read();
data.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemI16*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
} // execute
void
Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<VecElemI16>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MUBUF__BUFFER_STORE_DWORD class methods ---
Inst_MUBUF__BUFFER_STORE_DWORD::
Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_dword")
{
setFlag(MemoryRef);
setFlag(Store);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_STORE_DWORD
Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD()
{
} // ~Inst_MUBUF__BUFFER_STORE_DWORD
// --- description from .arch file ---
// Untyped buffer store dword.
void
Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
ConstVecOperandU32 data(gpuDynInst, extData.VDATA);
rsrcDesc.read();
offset.read();
data.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
} // execute
void
Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<VecElemU32>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_MUBUF__BUFFER_STORE_DWORDX2 class methods ---
Inst_MUBUF__BUFFER_STORE_DWORDX2
::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_dwordx2")
{
setFlag(MemoryRef);
setFlag(Store);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_STORE_DWORDX2
Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2()
{
} // ~Inst_MUBUF__BUFFER_STORE_DWORDX2
// --- description from .arch file ---
// Untyped buffer store 2 dwords.
void
Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
rsrcDesc.read();
offset.read();
data0.read();
data1.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
= data0[lane];
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
= data1[lane];
}
}
} // execute
void
Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<2>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_MUBUF__BUFFER_STORE_DWORDX3 class methods ---
Inst_MUBUF__BUFFER_STORE_DWORDX3
::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_dwordx3")
{
setFlag(MemoryRef);
setFlag(Store);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_STORE_DWORDX3
Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3()
{
} // ~Inst_MUBUF__BUFFER_STORE_DWORDX3
// --- description from .arch file ---
// Untyped buffer store 3 dwords.
void
Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
rsrcDesc.read();
offset.read();
data0.read();
data1.read();
data2.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
= data0[lane];
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
= data1[lane];
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
= data2[lane];
}
}
} // execute
void
Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<3>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_MUBUF__BUFFER_STORE_DWORDX4 class methods ---
Inst_MUBUF__BUFFER_STORE_DWORDX4
::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_dwordx4")
{
setFlag(MemoryRef);
setFlag(Store);
if (instData.LDS) {
setFlag(GroupSegment);
} else {
setFlag(GlobalSegment);
}
} // Inst_MUBUF__BUFFER_STORE_DWORDX4
Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4()
{
} // ~Inst_MUBUF__BUFFER_STORE_DWORDX4
// --- description from .arch file ---
// Untyped buffer store 4 dwords.
void
Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3);
rsrcDesc.read();
offset.read();
data0.read();
data1.read();
data2.read();
data3.read();
int inst_offset = instData.OFFSET;
if (!instData.IDXEN && !instData.OFFEN) {
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (!instData.IDXEN && instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr0, addr1, rsrcDesc, offset, inst_offset);
} else if (instData.IDXEN && !instData.OFFEN) {
addr0.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
} else {
addr0.read();
addr1.read();
calcAddr<ConstVecOperandU32, ConstVecOperandU32,
ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
addr1, addr0, rsrcDesc, offset, inst_offset);
}
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
= data0[lane];
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
= data1[lane];
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
= data2[lane];
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 3]
= data3[lane];
}
}
} // execute
void
Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<4>(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_MUBUF__BUFFER_STORE_LDS_DWORD class methods ---
Inst_MUBUF__BUFFER_STORE_LDS_DWORD
::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_store_lds_dword")
{
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_STORE_LDS_DWORD
Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD()
{
} // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD
// --- description from .arch file ---
// Store one DWORD from LDS memory to system memory without utilizing
// VGPRs.
void
Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_WBINVL1 class methods ---
Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_wbinvl1")
{
setFlag(MemoryRef);
setFlag(GPUStaticInst::MemSync);
setFlag(GlobalSegment);
setFlag(MemSync);
} // Inst_MUBUF__BUFFER_WBINVL1
Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1()
{
} // ~Inst_MUBUF__BUFFER_WBINVL1
// --- description from .arch file ---
// Write back and invalidate the shader L1.
// Always returns ACK to shader.
void
Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst)
{
// TODO: Fix it for gfx10. Once we have the new gfx10 cache model, we
// need to precisely communicate the writeback-invalidate operation to
// the new gfx10 coalescer rather than sending AcquireRelease markers.
// The SICoalescer would need to be updated appropriately as well.
injectGlobalMemFence(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_MUBUF__BUFFER_WBINVL1_VOL class methods ---
Inst_MUBUF__BUFFER_WBINVL1_VOL
::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt)
: Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") {
// This instruction is same as buffer_wbinvl1 instruction except this
// instruction only invalidate L1 shader line with MTYPE SC and GC.
// Since Hermes L1 (TCP) do not differentiate between its cache lines,
// this instruction currently behaves (and implemented ) exactly like
// buffer_wbinvl1 instruction.
setFlag(MemoryRef);
setFlag(GPUStaticInst::MemSync);
setFlag(GlobalSegment);
setFlag(MemSync);
} // Inst_MUBUF__BUFFER_WBINVL1_VOL
Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL()
{
} // ~Inst_MUBUF__BUFFER_WBINVL1_VOL
// --- description from .arch file ---
// Write back and invalidate the shader L1 only for lines that are marked
// --- volatile.
// Always returns ACK to shader.
void
Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none()) {
wf->decVMemInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
} // execute
void
Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst)
{
injectGlobalMemFence(gpuDynInst);
} // initiateAcc
void
Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_MUBUF__BUFFER_ATOMIC_SWAP class methods ---
Inst_MUBUF__BUFFER_ATOMIC_SWAP
::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_swap")
{
setFlag(AtomicExch);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_SWAP
Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA;
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP class methods ---
Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_cmpswap")
{
setFlag(AtomicCAS);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// src = DATA[0];
// cmp = DATA[1];
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods ---
Inst_MUBUF__BUFFER_ATOMIC_ADD
::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_add")
{
setFlag(AtomicAdd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_ADD
Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_ADD
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA;
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_SUB class methods ---
Inst_MUBUF__BUFFER_ATOMIC_SUB
::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_sub")
{
setFlag(AtomicSub);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_SUB
Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_SUB
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= DATA;
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_SMIN class methods ---
Inst_MUBUF__BUFFER_ATOMIC_SMIN
::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_smin")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_SMIN
Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_UMIN class methods ---
Inst_MUBUF__BUFFER_ATOMIC_UMIN
::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_umin")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_UMIN
Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_SMAX class methods ---
Inst_MUBUF__BUFFER_ATOMIC_SMAX
::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_smax")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_SMAX
Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_UMAX class methods ---
Inst_MUBUF__BUFFER_ATOMIC_UMAX
::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_umax")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_UMAX
Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_AND class methods ---
Inst_MUBUF__BUFFER_ATOMIC_AND
::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_and")
{
setFlag(AtomicAnd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_AND
Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_AND
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] &= DATA;
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_OR class methods ---
Inst_MUBUF__BUFFER_ATOMIC_OR
::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_or")
{
setFlag(AtomicOr);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_OR
Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_OR
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] |= DATA;
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_XOR class methods ---
Inst_MUBUF__BUFFER_ATOMIC_XOR
::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_xor")
{
setFlag(AtomicXor);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_XOR
Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_XOR
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] ^= DATA;
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_INC class methods ---
Inst_MUBUF__BUFFER_ATOMIC_INC
::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_inc")
{
setFlag(AtomicInc);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_INC
Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_INC
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_DEC class methods ---
Inst_MUBUF__BUFFER_ATOMIC_DEC
::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_dec")
{
setFlag(AtomicDec);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_DEC
Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_DEC
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
// (unsigned compare); RETURN_DATA = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_swap_x2")
{
setFlag(AtomicExch);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2")
{
setFlag(AtomicCAS);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// src = DATA[0:1];
// cmp = DATA[2:3];
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_add_x2")
{
setFlag(AtomicAdd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_sub_x2")
{
setFlag(AtomicSub);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_smin_x2")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_umin_x2")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_smax_x2")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_umax_x2")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_AND_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_AND_X2
::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_and_x2")
{
setFlag(AtomicAnd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_AND_X2
Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] &= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_OR_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_OR_X2
::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_or_x2")
{
setFlag(AtomicOr);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
} // Inst_MUBUF__BUFFER_ATOMIC_OR_X2
Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] |= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_xor_x2")
{
setFlag(AtomicXor);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] ^= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_INC_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_INC_X2
::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_inc_x2")
{
setFlag(AtomicInc);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_INC_X2
Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 class methods ---
Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt)
: Inst_MUBUF(iFmt, "buffer_atomic_dec_x2")
{
setFlag(AtomicDec);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2()
{
} // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
// (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_X class methods ---
Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_load_format_x")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X()
{
} // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
// --- description from .arch file ---
// Typed buffer load 1 dword with format conversion.
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY class methods ---
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_load_format_xy")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY()
{
} // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
// --- description from .arch file ---
// Typed buffer load 2 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ class methods ---
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_load_format_xyz")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ()
{
} // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
// --- description from .arch file ---
// Typed buffer load 3 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW class methods ---
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW()
{
} // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
// --- description from .arch file ---
// Typed buffer load 4 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_STORE_FORMAT_X class methods ---
Inst_MTBUF__TBUFFER_STORE_FORMAT_X
::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_store_format_x")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_STORE_FORMAT_X
Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X()
{
} // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X
// --- description from .arch file ---
// Typed buffer store 1 dword with format conversion.
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XY class methods ---
Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_store_format_xy")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY()
{
} // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
// --- description from .arch file ---
// Typed buffer store 2 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ class methods ---
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_store_format_xyz")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ()
{
} // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
// --- description from .arch file ---
// Typed buffer store 3 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW class methods ---
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW()
{
} // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
// --- description from .arch file ---
// Typed buffer store 4 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X class methods ---
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::
~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X()
{
} // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
// --- description from .arch file ---
// Typed buffer load 1 dword with format conversion.
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY class methods ---
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY()
{
} // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
// --- description from .arch file ---
// Typed buffer load 2 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ class methods ---
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(
InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ()
{
} // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
// --- description from .arch file ---
// Typed buffer load 3 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW class methods ---
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(
InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW()
{
} // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
// --- description from .arch file ---
// Typed buffer load 4 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X class methods ---
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X()
{
} // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
// --- description from .arch file ---
// Typed buffer store 1 dword with format conversion.
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY class methods ---
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY()
{
} // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
// --- description from .arch file ---
// Typed buffer store 2 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ class methods ---
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ()
{
} // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
// --- description from .arch file ---
// Typed buffer store 3 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW class methods ---
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt)
: Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW()
{
} // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
// --- description from .arch file ---
// Typed buffer store 4 dwords with format conversion.
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute(
GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_LOAD class methods ---
Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_load")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_LOAD
Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD()
{
} // ~Inst_MIMG__IMAGE_LOAD
// --- description from .arch file ---
// Image memory load with format conversion specified in T#. No sampler.
void
Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_LOAD_MIP class methods ---
Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_load_mip")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_LOAD_MIP
Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP()
{
} // ~Inst_MIMG__IMAGE_LOAD_MIP
// --- description from .arch file ---
// Image memory load with user-supplied mip level. No sampler.
void
Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_LOAD_PCK class methods ---
Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_load_pck")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_LOAD_PCK
Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK()
{
} // ~Inst_MIMG__IMAGE_LOAD_PCK
// --- description from .arch file ---
// Image memory load with no format conversion. No sampler.
void
Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_LOAD_PCK_SGN class methods ---
Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_load_pck_sgn")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_LOAD_PCK_SGN
Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN()
{
} // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN
// --- description from .arch file ---
// Image memory load with with no format conversion and sign extension. No
// --- sampler.
void
Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_LOAD_MIP_PCK class methods ---
Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_load_mip_pck")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_LOAD_MIP_PCK
Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK()
{
} // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK
// --- description from .arch file ---
// Image memory load with user-supplied mip level, no format conversion. No
// --- sampler.
void
Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN class methods ---
Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_load_mip_pck_sgn")
{
setFlag(MemoryRef);
setFlag(Load);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN()
{
} // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
// --- description from .arch file ---
// Image memory load with user-supplied mip level, no format conversion and
// --- with sign extension. No sampler.
void
Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_STORE class methods ---
Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_store")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_STORE
Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE()
{
} // ~Inst_MIMG__IMAGE_STORE
// --- description from .arch file ---
// Image memory store with format conversion specified in T#. No sampler.
void
Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_STORE_MIP class methods ---
Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_store_mip")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_STORE_MIP
Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP()
{
} // ~Inst_MIMG__IMAGE_STORE_MIP
// --- description from .arch file ---
// Image memory store with format conversion specified in T# to user
// specified mip level. No sampler.
void
Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_STORE_PCK class methods ---
Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_store_pck")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_STORE_PCK
Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK()
{
} // ~Inst_MIMG__IMAGE_STORE_PCK
// --- description from .arch file ---
// Image memory store of packed data without format conversion. No sampler.
void
Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_STORE_MIP_PCK class methods ---
Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_store_mip_pck")
{
setFlag(MemoryRef);
setFlag(Store);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_STORE_MIP_PCK
Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK()
{
} // ~Inst_MIMG__IMAGE_STORE_MIP_PCK
// --- description from .arch file ---
// Image memory store of packed data without format conversion to
// user-supplied mip level. No sampler.
void
Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_MIMG__IMAGE_GET_RESINFO class methods ---
Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_get_resinfo")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GET_RESINFO
Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO()
{
} // ~Inst_MIMG__IMAGE_GET_RESINFO
// --- description from .arch file ---
// return resource info for a given mip level specified in the address
// vgpr. No sampler. Returns 4 integer values into VGPRs 3-0:
// {num_mip_levels, depth, height, width}.
void
Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_SWAP class methods ---
Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_swap")
{
setFlag(AtomicExch);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_SWAP
Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_SWAP
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA;
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_CMPSWAP class methods ---
Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_cmpswap")
{
setFlag(AtomicCAS);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// src = DATA[0];
// cmp = DATA[1];
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_ADD class methods ---
Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_add")
{
setFlag(AtomicAdd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_ADD
Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_ADD
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA;
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_SUB class methods ---
Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_sub")
{
setFlag(AtomicSub);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_SUB
Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_SUB
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= DATA;
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_SMIN class methods ---
Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_smin")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_SMIN
Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_SMIN
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_UMIN class methods ---
Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_umin")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_UMIN
Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_UMIN
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_SMAX class methods ---
Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_smax")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_SMAX
Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_SMAX
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_UMAX class methods ---
Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_umax")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_UMAX
Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_UMAX
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_AND class methods ---
Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_and")
{
setFlag(AtomicAnd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_AND
Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_AND
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] &= DATA;
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_OR class methods ---
Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_or")
{
setFlag(AtomicOr);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_OR
Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_OR
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] |= DATA;
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_XOR class methods ---
Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_xor")
{
setFlag(AtomicXor);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_XOR
Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_XOR
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] ^= DATA;
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_INC class methods ---
Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_inc")
{
setFlag(AtomicInc);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_INC
Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_INC
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_ATOMIC_DEC class methods ---
Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_atomic_dec")
{
setFlag(AtomicDec);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_ATOMIC_DEC
Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC()
{
} // ~Inst_MIMG__IMAGE_ATOMIC_DEC
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
// (unsigned compare); RETURN_DATA = tmp.
void
Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE class methods ---
Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample")
{
} // Inst_MIMG__IMAGE_SAMPLE
Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE()
{
} // ~Inst_MIMG__IMAGE_SAMPLE
// --- description from .arch file ---
// sample texture map.
void
Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_CL class methods ---
Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_CL
Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_CL
// --- description from .arch file ---
// sample texture map, with LOD clamp specified in shader.
void
Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_D class methods ---
Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_d")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_D
Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_D
// --- description from .arch file ---
// sample texture map, with user derivatives
void
Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_D_CL class methods ---
Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_d_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_D_CL
Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_D_CL
// --- description from .arch file ---
// sample texture map, with LOD clamp specified in shader, with user
// --- derivatives.
void
Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_L class methods ---
Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_l")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_L
Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_L
// --- description from .arch file ---
// sample texture map, with user LOD.
void
Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_B class methods ---
Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_b")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_B
Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_B
// --- description from .arch file ---
// sample texture map, with lod bias.
void
Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_B_CL class methods ---
Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_b_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_B_CL
Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_B_CL
// --- description from .arch file ---
// sample texture map, with LOD clamp specified in shader, with lod bias.
void
Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_LZ class methods ---
Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_lz")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_LZ
Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_LZ
// --- description from .arch file ---
// sample texture map, from level 0.
void
Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C class methods ---
Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C
Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C
// --- description from .arch file ---
// sample texture map, with PCF.
void
Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_CL class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_CL
Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_CL
// --- description from .arch file ---
// SAMPLE_C, with LOD clamp specified in shader.
void
Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_D class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_d")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_D
Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_D
// --- description from .arch file ---
// SAMPLE_C, with user derivatives.
void
Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_d_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_D_CL
Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL
// --- description from .arch file ---
// SAMPLE_C, with LOD clamp specified in shader, with user derivatives.
void
Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_L class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_l")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_L
Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_L
// --- description from .arch file ---
// SAMPLE_C, with user LOD.
void
Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_B class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_b")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_B
Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_B
// --- description from .arch file ---
// SAMPLE_C, with lod bias.
void
Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_b_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_B_CL
Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL
// --- description from .arch file ---
// SAMPLE_C, with LOD clamp specified in shader, with lod bias.
void
Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_LZ class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_lz")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_LZ
Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ
// --- description from .arch file ---
// SAMPLE_C, from level 0.
void
Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_O
Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_O
// --- description from .arch file ---
// sample texture map, with user offsets.
void
Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_CL_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_CL_O
Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_CL_O
// --- description from .arch file ---
// SAMPLE_O with LOD clamp specified in shader.
void
Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_D_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_d_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_D_O
Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_D_O
// --- description from .arch file ---
// SAMPLE_O, with user derivatives.
void
Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_D_CL_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_d_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_D_CL_O
Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O
// --- description from .arch file ---
// SAMPLE_O, with LOD clamp specified in shader, with user derivatives.
void
Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_L_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_l_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_L_O
Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_L_O
// --- description from .arch file ---
// SAMPLE_O, with user LOD.
void
Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_B_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_b_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_B_O
Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_B_O
// --- description from .arch file ---
// SAMPLE_O, with lod bias.
void
Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_B_CL_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_b_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_B_CL_O
Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O
// --- description from .arch file ---
// SAMPLE_O, with LOD clamp specified in shader, with lod bias.
void
Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_LZ_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_lz_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_LZ_O
Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O
// --- description from .arch file ---
// SAMPLE_O, from level 0.
void
Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_O
Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_O
// --- description from .arch file ---
// SAMPLE_C with user specified offsets.
void
Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_CL_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_CL_O
Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O
// --- description from .arch file ---
// SAMPLE_C_O, with LOD clamp specified in shader.
void
Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_D_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_d_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_D_O
Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O
// --- description from .arch file ---
// SAMPLE_C_O, with user derivatives.
void
Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_d_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
// --- description from .arch file ---
// SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives.
void
Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_L_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_l_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_L_O
Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O
// --- description from .arch file ---
// SAMPLE_C_O, with user LOD.
void
Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_B_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_b_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_B_O
Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O
// --- description from .arch file ---
// SAMPLE_C_O, with lod bias.
void
Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_b_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
// --- description from .arch file ---
// SAMPLE_C_O, with LOD clamp specified in shader, with lod bias.
void
Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_LZ_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_lz_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
// --- description from .arch file ---
// SAMPLE_C_O, from level 0.
void
Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4 class methods ---
Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4
Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4()
{
} // ~Inst_MIMG__IMAGE_GATHER4
// --- description from .arch file ---
// gather 4 single component elements (2x2).
void
Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_CL class methods ---
Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_CL
Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL()
{
} // ~Inst_MIMG__IMAGE_GATHER4_CL
// --- description from .arch file ---
// gather 4 single component elements (2x2) with user LOD clamp.
void
Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_L class methods ---
Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_l")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_L
Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L()
{
} // ~Inst_MIMG__IMAGE_GATHER4_L
// --- description from .arch file ---
// gather 4 single component elements (2x2) with user LOD.
void
Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_B class methods ---
Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_b")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_B
Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B()
{
} // ~Inst_MIMG__IMAGE_GATHER4_B
// --- description from .arch file ---
// gather 4 single component elements (2x2) with user bias.
void
Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_B_CL class methods ---
Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_b_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_B_CL
Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL()
{
} // ~Inst_MIMG__IMAGE_GATHER4_B_CL
// --- description from .arch file ---
// gather 4 single component elements (2x2) with user bias and clamp.
void
Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_LZ class methods ---
Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_lz")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_LZ
Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ()
{
} // ~Inst_MIMG__IMAGE_GATHER4_LZ
// --- description from .arch file ---
// gather 4 single component elements (2x2) at level 0.
void
Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C class methods ---
Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C
Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C
// --- description from .arch file ---
// gather 4 single component elements (2x2) with PCF.
void
Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_CL class methods ---
Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_CL
Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_CL
// --- description from .arch file ---
// gather 4 single component elements (2x2) with user LOD clamp and PCF.
void
Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_L class methods ---
Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_l")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_L
Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_L
// --- description from .arch file ---
// gather 4 single component elements (2x2) with user LOD and PCF.
void
Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_B class methods ---
Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_b")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_B
Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_B
// --- description from .arch file ---
// gather 4 single component elements (2x2) with user bias and PCF.
void
Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_B_CL class methods ---
Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_b_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_B_CL
Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL
// --- description from .arch file ---
// gather 4 single component elements (2x2) with user bias, clamp and PCF.
void
Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_LZ class methods ---
Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_lz")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_LZ
Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_LZ
// --- description from .arch file ---
// gather 4 single component elements (2x2) at level 0, with PCF.
void
Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_O class methods ---
Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_O
Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_O
// --- description from .arch file ---
// GATHER4, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_CL_O class methods ---
Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_CL_O
Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_CL_O
// --- description from .arch file ---
// GATHER4_CL, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_L_O class methods ---
Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_l_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_L_O
Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_L_O
// --- description from .arch file ---
// GATHER4_L, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_B_O class methods ---
Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_b_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_B_O
Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_B_O
// --- description from .arch file ---
// GATHER4_B, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_B_CL_O class methods ---
Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_b_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_B_CL_O
Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O
// --- description from .arch file ---
// GATHER4_B_CL, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_LZ_O class methods ---
Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_lz_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_LZ_O
Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_LZ_O
// --- description from .arch file ---
// GATHER4_LZ, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_O class methods ---
Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_O
Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_O
// --- description from .arch file ---
// GATHER4_C, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_CL_O class methods ---
Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_CL_O
Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O
// --- description from .arch file ---
// GATHER4_C_CL, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_L_O class methods ---
Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_l_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_L_O
Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_L_O
// --- description from .arch file ---
// GATHER4_C_L, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_B_O class methods ---
Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_b_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_B_O
Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_B_O
// --- description from .arch file ---
// GATHER4_B, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_B_CL_O class methods ---
Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_b_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
// --- description from .arch file ---
// GATHER4_B_CL, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GATHER4_C_LZ_O class methods ---
Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_gather4_c_lz_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GATHER4_C_LZ_O
Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O()
{
} // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O
// --- description from .arch file ---
// GATHER4_C_LZ, with user offsets.
void
Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_GET_LOD class methods ---
Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_get_lod")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_GET_LOD
Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD()
{
} // ~Inst_MIMG__IMAGE_GET_LOD
// --- description from .arch file ---
// Return calculated LOD. Vdata gets 2 32bit integer values: { rawLOD,
// --- clampedLOD }.
void
Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_CD class methods ---
Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_cd")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_CD
Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_CD
// --- description from .arch file ---
// sample texture map, with user derivatives (LOD per quad)
void
Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_CD_CL class methods ---
Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_cd_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_CD_CL
Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL
// --- description from .arch file ---
// sample texture map, with LOD clamp specified in shader, with user
// --- derivatives (LOD per quad).
void
Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_CD class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_cd")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_CD
Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_CD
// --- description from .arch file ---
// SAMPLE_C, with user derivatives (LOD per quad).
void
Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_cd_cl")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
// --- description from .arch file ---
// SAMPLE_C, with LOD clamp specified in shader, with user derivatives
// (LOD per quad).
void
Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_CD_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_cd_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_CD_O
Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_CD_O
// --- description from .arch file ---
// SAMPLE_O, with user derivatives (LOD per quad).
void
Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_CD_CL_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_cd_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
// --- description from .arch file ---
// SAMPLE_O, with LOD clamp specified in shader, with user derivatives
// (LOD per quad).
void
Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_CD_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_cd_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_CD_O
Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O
// --- description from .arch file ---
// SAMPLE_C_O, with user derivatives (LOD per quad).
void
Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O class methods ---
Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O(
InFmt_MIMG *iFmt)
: Inst_MIMG(iFmt, "image_sample_c_cd_cl_o")
{
setFlag(GlobalSegment);
} // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O()
{
} // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
// --- description from .arch file ---
// SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives
// (LOD per quad).
void
Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_EXP__EXP class methods ---
Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt)
: Inst_EXP(iFmt, "exp")
{
} // Inst_EXP__EXP
Inst_EXP__EXP::~Inst_EXP__EXP()
{
} // ~Inst_EXP__EXP
// --- description from .arch file ---
// Export through SX.
void
Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_LOAD_UBYTE class methods ---
Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_load_ubyte")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_FLAT__FLAT_LOAD_UBYTE
Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE()
{
} // ~Inst_FLAT__FLAT_LOAD_UBYTE
// --- description from .arch file ---
// Untyped buffer load unsigned byte (zero extend to VGPR destination).
void
Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<VecElemU8>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
gpuDynInst->d_data))[lane]);
}
}
vdst.write();
} // execute
// --- Inst_FLAT__FLAT_LOAD_SBYTE class methods ---
Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_load_sbyte")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_FLAT__FLAT_LOAD_SBYTE
Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE()
{
} // ~Inst_FLAT__FLAT_LOAD_SBYTE
// --- description from .arch file ---
// Untyped buffer load signed byte (sign extend to VGPR destination).
void
Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_FLAT__FLAT_LOAD_USHORT class methods ---
Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_load_ushort")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_FLAT__FLAT_LOAD_USHORT
Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT()
{
} // ~Inst_FLAT__FLAT_LOAD_USHORT
// --- description from .arch file ---
// Untyped buffer load unsigned short (zero extend to VGPR destination).
void
Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<VecElemU16>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
gpuDynInst->d_data))[lane]);
}
}
vdst.write();
} // execute
// --- Inst_FLAT__FLAT_LOAD_SSHORT class methods ---
Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_load_sshort")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_FLAT__FLAT_LOAD_SSHORT
Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT()
{
} // ~Inst_FLAT__FLAT_LOAD_SSHORT
// --- description from .arch file ---
// Untyped buffer load signed short (sign extend to VGPR destination).
void
Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
void
Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
{
} // initiateAcc
void
Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_FLAT__FLAT_LOAD_DWORD class methods ---
Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_load_dword")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_FLAT__FLAT_LOAD_DWORD
Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD()
{
} // ~Inst_FLAT__FLAT_LOAD_DWORD
// --- description from .arch file ---
// Untyped buffer load dword.
void
Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<VecElemU32>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
} // completeAcc
// --- Inst_FLAT__FLAT_LOAD_DWORDX2 class methods ---
Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_load_dwordx2")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_FLAT__FLAT_LOAD_DWORDX2
Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2()
{
} // ~Inst_FLAT__FLAT_LOAD_DWORDX2
// --- description from .arch file ---
// Untyped buffer load 2 dwords.
void
Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<VecElemU64>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU64 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
} // completeAcc
// --- Inst_FLAT__FLAT_LOAD_DWORDX3 class methods ---
Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_load_dwordx3")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_FLAT__FLAT_LOAD_DWORDX3
Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3()
{
} // ~Inst_FLAT__FLAT_LOAD_DWORDX3
// --- description from .arch file ---
// Untyped buffer load 3 dwords.
void
Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<3>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst0(gpuDynInst, extData.VDST);
VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 3];
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 3 + 1];
vdst2[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 3 + 2];
}
}
vdst0.write();
vdst1.write();
vdst2.write();
} // completeAcc
// --- Inst_FLAT__FLAT_LOAD_DWORDX4 class methods ---
Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_load_dwordx4")
{
setFlag(MemoryRef);
setFlag(Load);
} // Inst_FLAT__FLAT_LOAD_DWORDX4
Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4()
{
} // ~Inst_FLAT__FLAT_LOAD_DWORDX4
// --- description from .arch file ---
// Untyped buffer load 4 dwords.
void
Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
addr.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemRead<4>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
{
VecOperandU32 vdst0(gpuDynInst, extData.VDST);
VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4];
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4 + 1];
vdst2[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4 + 2];
vdst3[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4 + 3];
}
}
vdst0.write();
vdst1.write();
vdst2.write();
vdst3.write();
} // completeAcc
// --- Inst_FLAT__FLAT_STORE_BYTE class methods ---
Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_store_byte")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_FLAT__FLAT_STORE_BYTE
Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE()
{
} // ~Inst_FLAT__FLAT_STORE_BYTE
// --- description from .arch file ---
// Untyped buffer store byte.
void
Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU8 data(gpuDynInst, extData.DATA);
addr.read();
data.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<VecElemU8>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // execute
// --- Inst_FLAT__FLAT_STORE_SHORT class methods ---
Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_store_short")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_FLAT__FLAT_STORE_SHORT
Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT()
{
} // ~Inst_FLAT__FLAT_STORE_SHORT
// --- description from .arch file ---
// Untyped buffer store short.
void
Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU16 data(gpuDynInst, extData.DATA);
addr.read();
data.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<VecElemU16>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_FLAT__FLAT_STORE_DWORD class methods ---
Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_store_dword")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_FLAT__FLAT_STORE_DWORD
Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD()
{
} // ~Inst_FLAT__FLAT_STORE_DWORD
// --- description from .arch file ---
// Untyped buffer store dword.
void
Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data(gpuDynInst, extData.DATA);
addr.read();
data.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<VecElemU32>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_FLAT__FLAT_STORE_DWORDX2 class methods ---
Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_store_dwordx2")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_FLAT__FLAT_STORE_DWORDX2
Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2()
{
} // ~Inst_FLAT__FLAT_STORE_DWORDX2
// --- description from .arch file ---
// Untyped buffer store 2 dwords.
void
Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU64 data(gpuDynInst, extData.DATA);
addr.read();
data.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
= data[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<VecElemU64>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_FLAT__FLAT_STORE_DWORDX3 class methods ---
Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_store_dwordx3")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_FLAT__FLAT_STORE_DWORDX3
Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3()
{
} // ~Inst_FLAT__FLAT_STORE_DWORDX3
// --- description from .arch file ---
// Untyped buffer store 3 dwords.
void
Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
addr.read();
data0.read();
data1.read();
data2.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 3] = data0[lane];
(reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 3 + 1] = data1[lane];
(reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 3 + 2] = data2[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<3>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_FLAT__FLAT_STORE_DWORDX4 class methods ---
Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_store_dwordx4")
{
setFlag(MemoryRef);
setFlag(Store);
} // Inst_FLAT__FLAT_STORE_DWORDX4
Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4()
{
} // ~Inst_FLAT__FLAT_STORE_DWORDX4
// --- description from .arch file ---
// Untyped buffer store 4 dwords.
void
Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
wf->decExpInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3);
addr.read();
data0.read();
data1.read();
data2.read();
data3.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4] = data0[lane];
(reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
(reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
(reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initMemWrite<4>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
{
} // completeAcc
// --- Inst_FLAT__FLAT_ATOMIC_SWAP class methods ---
Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_swap")
{
setFlag(AtomicExch);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_SWAP
Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP()
{
} // ~Inst_FLAT__FLAT_ATOMIC_SWAP
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA;
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data(gpuDynInst, extData.DATA);
addr.read();
data.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
= data[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initAtomicAccess<VecElemU32>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst)
{
if (isAtomicRet()) {
VecOperandU32 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
}
} // completeAcc
// --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods ---
Inst_FLAT__FLAT_ATOMIC_CMPSWAP
::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_cmpswap")
{
setFlag(AtomicCAS);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_CMPSWAP
Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP()
{
} // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// src = DATA[0];
// cmp = DATA[1];
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data(gpuDynInst, extData.DATA);
ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1);
addr.read();
data.read();
cmp.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
= data[lane];
(reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
= cmp[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initAtomicAccess<VecElemU32>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst)
{
if (isAtomicRet()) {
VecOperandU32 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
}
} // completeAcc
// --- Inst_FLAT__FLAT_ATOMIC_ADD class methods ---
Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_add")
{
setFlag(AtomicAdd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_ADD
Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD()
{
} // ~Inst_FLAT__FLAT_ATOMIC_ADD
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA;
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU32 data(gpuDynInst, extData.DATA);
addr.read();
data.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
= data[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initAtomicAccess<VecElemU32>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst)
{
if (isAtomicRet()) {
VecOperandU32 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU32*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
}
} // completeAcc
// --- Inst_FLAT__FLAT_ATOMIC_SUB class methods ---
Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_sub")
{
setFlag(AtomicSub);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_SUB
Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB()
{
} // ~Inst_FLAT__FLAT_ATOMIC_SUB
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= DATA;
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_SMIN class methods ---
Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_smin")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_SMIN
Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN()
{
} // ~Inst_FLAT__FLAT_ATOMIC_SMIN
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods ---
Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_umin")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_UMIN
Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN()
{
} // ~Inst_FLAT__FLAT_ATOMIC_UMIN
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_SMAX class methods ---
Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_smax")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_SMAX
Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX()
{
} // ~Inst_FLAT__FLAT_ATOMIC_SMAX
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods ---
Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_umax")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_UMAX
Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX()
{
} // ~Inst_FLAT__FLAT_ATOMIC_UMAX
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_AND class methods ---
Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_and")
{
setFlag(AtomicAnd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_AND
Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND()
{
} // ~Inst_FLAT__FLAT_ATOMIC_AND
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] &= DATA;
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_OR class methods ---
Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_or")
{
setFlag(AtomicOr);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_OR
Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR()
{
} // ~Inst_FLAT__FLAT_ATOMIC_OR
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] |= DATA;
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_XOR class methods ---
Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_xor")
{
setFlag(AtomicXor);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_XOR
Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR()
{
} // ~Inst_FLAT__FLAT_ATOMIC_XOR
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] ^= DATA;
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_INC class methods ---
Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_inc")
{
setFlag(AtomicInc);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_INC
Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC()
{
} // ~Inst_FLAT__FLAT_ATOMIC_INC
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
// RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_DEC class methods ---
Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_dec")
{
setFlag(AtomicDec);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_DEC
Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC()
{
} // ~Inst_FLAT__FLAT_ATOMIC_DEC
// --- description from .arch file ---
// 32b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
// (unsigned compare); RETURN_DATA = tmp.
void
Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_swap_x2")
{
setFlag(AtomicExch);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_SWAP_X2
Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2")
{
setFlag(AtomicCAS);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// src = DATA[0:1];
// cmp = DATA[2:3];
// MEM[ADDR] = (tmp == cmp) ? src : tmp;
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU64 data(gpuDynInst, extData.DATA);
ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2);
addr.read();
data.read();
cmp.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU64*>(gpuDynInst->x_data))[lane]
= data[lane];
(reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
= cmp[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initAtomicAccess<VecElemU64>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst)
{
if (isAtomicRet()) {
VecOperandU64 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
}
} // completeAcc
// --- Inst_FLAT__FLAT_ATOMIC_ADD_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_add_x2")
{
setFlag(AtomicAdd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_ADD_X2
Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] += DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
if (gpuDynInst->exec_mask.none() && isFlat()) {
wf->decVMemInstsIssued();
wf->decLGKMInstsIssued();
return;
}
gpuDynInst->execUnitId = wf->execUnitId;
gpuDynInst->latency.init(gpuDynInst->computeUnit());
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
ConstVecOperandU64 data(gpuDynInst, extData.DATA);
addr.read();
data.read();
calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
(reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
= data[lane];
}
}
issueRequestHelper(gpuDynInst);
} // execute
void
Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
{
initAtomicAccess<VecElemU64>(gpuDynInst);
} // initiateAcc
void
Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst)
{
if (isAtomicRet()) {
VecOperandU64 vdst(gpuDynInst, extData.VDST);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vdst[lane] = (reinterpret_cast<VecElemU64*>(
gpuDynInst->d_data))[lane];
}
}
vdst.write();
}
} // completeAcc
// --- Inst_FLAT__FLAT_ATOMIC_SUB_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_sub_x2")
{
setFlag(AtomicSub);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_SUB_X2
Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_smin_x2")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_SMIN_X2
Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_umin_x2")
{
setFlag(AtomicMin);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_UMIN_X2
Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_smax_x2")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_SMAX_X2
Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_umax_x2")
{
setFlag(AtomicMax);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_UMAX_X2
Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_AND_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_and_x2")
{
setFlag(AtomicAnd);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_AND_X2
Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_AND_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] &= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_OR_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_or_x2")
{
setFlag(AtomicOr);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_OR_X2
Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_OR_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] |= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_XOR_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_xor_x2")
{
setFlag(AtomicXor);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_XOR_X2
Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] ^= DATA[0:1];
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_INC_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_inc_x2")
{
setFlag(AtomicInc);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_INC_X2
Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_INC_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_FLAT__FLAT_ATOMIC_DEC_X2 class methods ---
Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2(
InFmt_FLAT *iFmt)
: Inst_FLAT(iFmt, "flat_atomic_dec_x2")
{
setFlag(AtomicDec);
if (instData.GLC) {
setFlag(AtomicReturn);
} else {
setFlag(AtomicNoReturn);
}
setFlag(MemoryRef);
} // Inst_FLAT__FLAT_ATOMIC_DEC_X2
Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2()
{
} // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2
// --- description from .arch file ---
// 64b:
// tmp = MEM[ADDR];
// MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
// (unsigned compare);
// RETURN_DATA[0:1] = tmp.
void
Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
} // namespace VegaISA
} // namespace gem5