blob: f5b08b7ce125929e16cf300e9600c1c5b106acaf [file] [log] [blame]
/*
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "arch/amdgpu/vega/insts/instructions.hh"
#include <cmath>
#include "arch/amdgpu/vega/insts/inst_util.hh"
#include "debug/VEGA.hh"
#include "debug/GPUSync.hh"
#include "dev/amdgpu/hwreg_defines.hh"
#include "gpu-compute/shader.hh"
namespace gem5
{
namespace VegaISA
{
// --- Inst_SOP2__S_ADD_U32 class methods ---
Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_add_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_ADD_U32
Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32()
{
} // ~Inst_SOP2__S_ADD_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u;
// SCC = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an unsigned
// --- overflow/carry-out for S_ADDC_U32.
void
Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() + src1.rawData();
scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData())
>= 0x100000000ULL ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_SUB_U32 class methods ---
Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_sub_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_SUB_U32
Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32()
{
} // ~Inst_SOP2__S_SUB_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u;
// SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out for
// --- S_SUBB_U32.
void
Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() - src1.rawData();
scc = (src1.rawData() > src0.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ADD_I32 class methods ---
Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_add_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_ADD_I32
Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32()
{
} // ~Inst_SOP2__S_ADD_I32
// --- description from .arch file ---
// D.i = S0.i + S1.i;
// SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed
// overflow.
// This opcode is not suitable for use with S_ADDC_U32 for implementing
// 64-bit operations.
void
Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() + src1.rawData();
scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31)
&& bits(src0.rawData(), 31) != bits(sdst.rawData(), 31))
? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_SUB_I32 class methods ---
Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_sub_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_SUB_I32
Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32()
{
} // ~Inst_SOP2__S_SUB_I32
// --- description from .arch file ---
// D.i = S0.i - S1.i;
// SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed
// overflow.
// CAUTION: The condition code behaviour for this opcode is inconsistent
// with V_SUB_I32; see V_SUB_I32 for further details.
// This opcode is not suitable for use with S_SUBB_U32 for implementing
// 64-bit operations.
void
Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() - src1.rawData();
scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31)
&& bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ADDC_U32 class methods ---
Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_addc_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_ADDC_U32
Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32()
{
} // ~Inst_SOP2__S_ADDC_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u + SCC;
// SCC = (S0.u + S1.u + SCC >= 0x800000000ULL ? 1 : 0) is an unsigned
// overflow.
void
Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc.read();
sdst = src0.rawData() + src1.rawData() + scc.rawData();
scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()
+ (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_SUBB_U32 class methods ---
Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_subb_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_SUBB_U32
Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32()
{
} // ~Inst_SOP2__S_SUBB_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u - SCC;
// SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow.
void
Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc.read();
sdst = src0.rawData() - src1.rawData() - scc.rawData();
scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_MIN_I32 class methods ---
Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_min_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_MIN_I32
Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32()
{
} // ~Inst_SOP2__S_MIN_I32
// --- description from .arch file ---
// D.i = (S0.i < S1.i) ? S0.i : S1.i;
// SCC = 1 if S0 is chosen as the minimum value.
void
Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = std::min(src0.rawData(), src1.rawData());
scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_MIN_U32 class methods ---
Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_min_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_MIN_U32
Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32()
{
} // ~Inst_SOP2__S_MIN_U32
// --- description from .arch file ---
// D.u = (S0.u < S1.u) ? S0.u : S1.u;
// SCC = 1 if S0 is chosen as the minimum value.
void
Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = std::min(src0.rawData(), src1.rawData());
scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_MAX_I32 class methods ---
Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_max_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_MAX_I32
Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32()
{
} // ~Inst_SOP2__S_MAX_I32
// --- description from .arch file ---
// D.i = (S0.i > S1.i) ? S0.i : S1.i;
// SCC = 1 if S0 is chosen as the maximum value.
void
Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = std::max(src0.rawData(), src1.rawData());
scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_MAX_U32 class methods ---
Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_max_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_MAX_U32
Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32()
{
} // ~Inst_SOP2__S_MAX_U32
// --- description from .arch file ---
// D.u = (S0.u > S1.u) ? S0.u : S1.u;
// SCC = 1 if S0 is chosen as the maximum value.
void
Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = std::max(src0.rawData(), src1.rawData());
scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_CSELECT_B32 class methods ---
Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_cselect_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_CSELECT_B32
Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32()
{
} // ~Inst_SOP2__S_CSELECT_B32
// --- description from .arch file ---
// D.u = SCC ? S0.u : S1.u (conditional select).
void
Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc.read();
sdst = scc.rawData() ? src0.rawData() : src1.rawData();
sdst.write();
} // execute
// --- Inst_SOP2__S_CSELECT_B64 class methods ---
Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_cselect_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_CSELECT_B64
Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64()
{
} // ~Inst_SOP2__S_CSELECT_B64
// --- description from .arch file ---
// D.u64 = SCC ? S0.u64 : S1.u64 (conditional select).
void
Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
scc.read();
sdst = scc.rawData() ? src0.rawData() : src1.rawData();
sdst.write();
} // execute
// --- Inst_SOP2__S_AND_B32 class methods ---
Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_and_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_AND_B32
Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32()
{
} // ~Inst_SOP2__S_AND_B32
// --- description from .arch file ---
// D.u = S0.u & S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() & src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_AND_B64 class methods ---
Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_and_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_AND_B64
Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64()
{
} // ~Inst_SOP2__S_AND_B64
// --- description from .arch file ---
// D.u64 = S0.u64 & S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() & src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_OR_B32 class methods ---
Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_or_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_OR_B32
Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32()
{
} // ~Inst_SOP2__S_OR_B32
// --- description from .arch file ---
// D.u = S0.u | S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() | src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_OR_B64 class methods ---
Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_or_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_OR_B64
Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64()
{
} // ~Inst_SOP2__S_OR_B64
// --- description from .arch file ---
// D.u64 = S0.u64 | S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() | src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_XOR_B32 class methods ---
Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_xor_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_XOR_B32
Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32()
{
} // ~Inst_SOP2__S_XOR_B32
// --- description from .arch file ---
// D.u = S0.u ^ S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() ^ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_XOR_B64 class methods ---
Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_xor_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_XOR_B64
Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64()
{
} // ~Inst_SOP2__S_XOR_B64
// --- description from .arch file ---
// D.u64 = S0.u64 ^ S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() ^ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ANDN2_B32 class methods ---
Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_andn2_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_ANDN2_B32
Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32()
{
} // ~Inst_SOP2__S_ANDN2_B32
// --- description from .arch file ---
// D.u = S0.u & ~S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() &~ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ANDN2_B64 class methods ---
Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_andn2_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_ANDN2_B64
Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64()
{
} // ~Inst_SOP2__S_ANDN2_B64
// --- description from .arch file ---
// D.u64 = S0.u64 & ~S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() &~ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ORN2_B32 class methods ---
Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_orn2_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_ORN2_B32
Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32()
{
} // ~Inst_SOP2__S_ORN2_B32
// --- description from .arch file ---
// D.u = S0.u | ~S1.u;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() |~ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ORN2_B64 class methods ---
Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_orn2_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_ORN2_B64
Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64()
{
} // ~Inst_SOP2__S_ORN2_B64
// --- description from .arch file ---
// D.u64 = S0.u64 | ~S1.u64;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = src0.rawData() |~ src1.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_NAND_B32 class methods ---
Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_nand_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_NAND_B32
Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32()
{
} // ~Inst_SOP2__S_NAND_B32
// --- description from .arch file ---
// D.u = ~(S0.u & S1.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() & src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_NAND_B64 class methods ---
Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_nand_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_NAND_B64
Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64()
{
} // ~Inst_SOP2__S_NAND_B64
// --- description from .arch file ---
// D.u64 = ~(S0.u64 & S1.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() & src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_NOR_B32 class methods ---
Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_nor_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_NOR_B32
Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32()
{
} // ~Inst_SOP2__S_NOR_B32
// --- description from .arch file ---
// D.u = ~(S0.u | S1.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() | src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_NOR_B64 class methods ---
Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_nor_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_NOR_B64
Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64()
{
} // ~Inst_SOP2__S_NOR_B64
// --- description from .arch file ---
// D.u64 = ~(S0.u64 | S1.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() | src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_XNOR_B32 class methods ---
Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_xnor_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_XNOR_B32
Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32()
{
} // ~Inst_SOP2__S_XNOR_B32
// --- description from .arch file ---
// D.u = ~(S0.u ^ S1.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() ^ src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_XNOR_B64 class methods ---
Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_xnor_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_XNOR_B64
Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64()
{
} // ~Inst_SOP2__S_XNOR_B64
// --- description from .arch file ---
// D.u64 = ~(S0.u64 ^ S1.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = ~(src0.rawData() ^ src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_LSHL_B32 class methods ---
Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_lshl_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_LSHL_B32
Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32()
{
} // ~Inst_SOP2__S_LSHL_B32
// --- description from .arch file ---
// D.u = S0.u << S1.u[4:0];
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() << bits(src1.rawData(), 4, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_LSHL_B64 class methods ---
Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_lshl_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_LSHL_B64
Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64()
{
} // ~Inst_SOP2__S_LSHL_B64
// --- description from .arch file ---
// D.u64 = S0.u64 << S1.u[5:0];
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() << bits(src1.rawData(), 5, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_LSHR_B32 class methods ---
Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_lshr_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_LSHR_B32
Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32()
{
} // ~Inst_SOP2__S_LSHR_B32
// --- description from .arch file ---
// D.u = S0.u >> S1.u[4:0];
// SCC = 1 if result is non-zero.
// The vacated bits are set to zero.
void
Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_LSHR_B64 class methods ---
Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_lshr_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_LSHR_B64
Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64()
{
} // ~Inst_SOP2__S_LSHR_B64
// --- description from .arch file ---
// D.u64 = S0.u64 >> S1.u[5:0];
// SCC = 1 if result is non-zero.
// The vacated bits are set to zero.
void
Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ASHR_I32 class methods ---
Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_ashr_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_ASHR_I32
Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32()
{
} // ~Inst_SOP2__S_ASHR_I32
// --- description from .arch file ---
// D.i = signext(S0.i) >> S1.u[4:0];
// SCC = 1 if result is non-zero.
// The vacated bits are set to the sign bit of the input value.
void
Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_ASHR_I64 class methods ---
Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_ashr_i64")
{
setFlag(ALU);
} // Inst_SOP2__S_ASHR_I64
Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64()
{
} // ~Inst_SOP2__S_ASHR_I64
// --- description from .arch file ---
// D.i64 = signext(S0.i64) >> S1.u[5:0];
// SCC = 1 if result is non-zero.
// The vacated bits are set to the sign bit of the input value.
void
Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_BFM_B32 class methods ---
Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfm_b32")
{
setFlag(ALU);
} // Inst_SOP2__S_BFM_B32
Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32()
{
} // ~Inst_SOP2__S_BFM_B32
// --- description from .arch file ---
// D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask).
void
Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src0.read();
src1.read();
sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1)
<< bits(src1.rawData(), 4, 0);
sdst.write();
} // execute
// --- Inst_SOP2__S_BFM_B64 class methods ---
Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfm_b64")
{
setFlag(ALU);
} // Inst_SOP2__S_BFM_B64
Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64()
{
} // ~Inst_SOP2__S_BFM_B64
// --- description from .arch file ---
// D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask).
void
Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src0.read();
src1.read();
sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1)
<< bits(src1.rawData(), 5, 0);
sdst.write();
} // execute
// --- Inst_SOP2__S_MUL_I32 class methods ---
Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_mul_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_MUL_I32
Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32()
{
} // ~Inst_SOP2__S_MUL_I32
// --- description from .arch file ---
// D.i = S0.i * S1.i.
void
Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src0.read();
src1.read();
sdst = src0.rawData() * src1.rawData();
sdst.write();
} // execute
// --- Inst_SOP2__S_BFE_U32 class methods ---
Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfe_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_BFE_U32
Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32()
{
} // ~Inst_SOP2__S_BFE_U32
// --- description from .arch file ---
// Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
// field width.
// D.u = (S0.u>>S1.u[4:0]) & ((1<<S1.u[22:16])-1);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
& ((1 << bits(src1.rawData(), 22, 16)) - 1);
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_BFE_I32 class methods ---
Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfe_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_BFE_I32
Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32()
{
} // ~Inst_SOP2__S_BFE_I32
// --- description from .arch file ---
// Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
// field width.
// D.i = (S0.i>>S1.u[4:0]) & ((1<<S1.u[22:16])-1);
// Sign-extend the result;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
& ((1 << bits(src1.rawData(), 22, 16)) - 1);
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_BFE_U64 class methods ---
Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfe_u64")
{
setFlag(ALU);
} // Inst_SOP2__S_BFE_U64
Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64()
{
} // ~Inst_SOP2__S_BFE_U64
// --- description from .arch file ---
// Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
// field width.
// D.u64 = (S0.u64>>S1.u[5:0]) & ((1<<S1.u[22:16])-1);
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
& ((1 << bits(src1.rawData(), 22, 16)) - 1);
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_BFE_I64 class methods ---
Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_bfe_i64")
{
setFlag(ALU);
} // Inst_SOP2__S_BFE_I64
Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64()
{
} // ~Inst_SOP2__S_BFE_I64
// --- description from .arch file ---
// Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
// field width.
// D.i64 = (S0.i64>>S1.u[5:0]) & ((1<<S1.u[22:16])-1);
// Sign-extend result;
// SCC = 1 if result is non-zero.
void
Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src0.read();
src1.read();
sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
& ((1 << bits(src1.rawData(), 22, 16)) - 1);
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_CBRANCH_G_FORK class methods ---
Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_cbranch_g_fork")
{
setFlag(Branch);
} // Inst_SOP2__S_CBRANCH_G_FORK
Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK()
{
} // ~Inst_SOP2__S_CBRANCH_G_FORK
// --- description from .arch file ---
// mask_pass = S0.u64 & EXEC;
// mask_fail = ~S0.u64 & EXEC;
// if(mask_pass == EXEC)
// PC = S1.u64;
// elsif(mask_fail == EXEC)
// PC += 4;
// elsif(bitcount(mask_fail) < bitcount(mask_pass))
// EXEC = mask_fail;
// SGPR[CSP*4] = { S1.u64, mask_pass };
// CSP++;
// PC += 4;
// else
// EXEC = mask_pass;
// SGPR[CSP*4] = { PC + 4, mask_fail };
// CSP++;
// PC = S1.u64;
// end.
// Conditional branch using branch-stack.
// S0 = compare mask(vcc or any sgpr) and
// S1 = 64-bit byte address of target instruction.
// See also S_CBRANCH_JOIN.
void
Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOP2__S_ABSDIFF_I32 class methods ---
Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_absdiff_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_ABSDIFF_I32
Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32()
{
} // ~Inst_SOP2__S_ABSDIFF_I32
// --- description from .arch file ---
// D.i = S0.i - S1.i;
// if(D.i < 0) then D.i = -D.i;
// SCC = 1 if result is non-zero.
// Compute the absolute value of difference between two values.
void
Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
sdst = std::abs(src0.rawData() - src1.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP2__S_RFE_RESTORE_B64 class methods ---
Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64(
InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_rfe_restore_b64")
{
} // Inst_SOP2__S_RFE_RESTORE_B64
Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64()
{
} // ~Inst_SOP2__S_RFE_RESTORE_B64
// --- description from .arch file ---
// PRIV = 0;
// PC = S0.u64;
// INST_ATC = S1.u32[0].
// Return from exception handler and continue, possibly changing the
// --- instruction ATC mode.
// This instruction may only be used within a trap handler.
// Use this instruction when the main program may be in a different memory
// --- space than the trap handler.
void
Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOP2__S_MUL_HI_U32 class methods ---
Inst_SOP2__S_MUL_HI_U32::Inst_SOP2__S_MUL_HI_U32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_mul_hi_u32")
{
setFlag(ALU);
} // Inst_SOP2__S_MUL_HI_U32
Inst_SOP2__S_MUL_HI_U32::~Inst_SOP2__S_MUL_HI_U32()
{
} // ~Inst_SOP2__S_MUL_HI_U32
// --- description from .arch file ---
// D.u = (S0.u * S1.u) >> 32;
void
Inst_SOP2__S_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src0.read();
src1.read();
VecElemU64 tmp_dst =
((VecElemU64)src0.rawData() * (VecElemU64)src1.rawData());
sdst = (tmp_dst >> 32);
sdst.write();
} // execute
// --- Inst_SOP2__S_MUL_HI_I32 class methods ---
Inst_SOP2__S_MUL_HI_I32::Inst_SOP2__S_MUL_HI_I32(InFmt_SOP2 *iFmt)
: Inst_SOP2(iFmt, "s_mul_hi_i32")
{
setFlag(ALU);
} // Inst_SOP2__S_MUL_HI_I32
Inst_SOP2__S_MUL_HI_I32::~Inst_SOP2__S_MUL_HI_I32()
{
} // ~Inst_SOP2__S_MUL_HI_I32
// --- description from .arch file ---
// D.u = (S0.u * S1.u) >> 32;
void
Inst_SOP2__S_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src0.read();
src1.read();
VecElemI64 tmp_src0 =
sext<std::numeric_limits<VecElemI64>::digits>(src0.rawData());
VecElemI64 tmp_src1 =
sext<std::numeric_limits<VecElemI64>::digits>(src1.rawData());
sdst = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
sdst.write();
} // execute
// --- Inst_SOPK__S_MOVK_I32 class methods ---
Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_movk_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_MOVK_I32
Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32()
{
} // ~Inst_SOPK__S_MOVK_I32
// --- description from .arch file ---
// D.i = signext(SIMM16) (sign extension).
void
Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
sdst = simm16;
sdst.write();
} // execute
// --- Inst_SOPK__S_CMOVK_I32 class methods ---
Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmovk_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMOVK_I32
Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32()
{
} // ~Inst_SOPK__S_CMOVK_I32
// --- description from .arch file ---
// if(SCC) then D.i = signext(SIMM16);
// else NOP.
// Conditional move with sign extension.
void
Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
scc.read();
if (scc.rawData()) {
sdst = simm16;
sdst.write();
}
} // execute
// --- Inst_SOPK__S_CMPK_EQ_I32 class methods ---
Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_eq_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_EQ_I32
Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32()
{
} // ~Inst_SOPK__S_CMPK_EQ_I32
// --- description from .arch file ---
// SCC = (S0.i == signext(SIMM16)).
void
Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() == simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LG_I32 class methods ---
Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_lg_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LG_I32
Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32()
{
} // ~Inst_SOPK__S_CMPK_LG_I32
// --- description from .arch file ---
// SCC = (S0.i != signext(SIMM16)).
void
Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() != simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_GT_I32 class methods ---
Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_gt_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_GT_I32
Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32()
{
} // ~Inst_SOPK__S_CMPK_GT_I32
// --- description from .arch file ---
// SCC = (S0.i > signext(SIMM16)).
void
Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() > simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_GE_I32 class methods ---
Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_ge_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_GE_I32
Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32()
{
} // ~Inst_SOPK__S_CMPK_GE_I32
// --- description from .arch file ---
// SCC = (S0.i >= signext(SIMM16)).
void
Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() >= simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LT_I32 class methods ---
Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_lt_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LT_I32
Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32()
{
} // ~Inst_SOPK__S_CMPK_LT_I32
// --- description from .arch file ---
// SCC = (S0.i < signext(SIMM16)).
void
Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() < simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LE_I32 class methods ---
Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_le_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LE_I32
Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32()
{
} // ~Inst_SOPK__S_CMPK_LE_I32
// --- description from .arch file ---
// SCC = (S0.i <= signext(SIMM16)).
void
Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() <= simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_EQ_U32 class methods ---
Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_eq_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_EQ_U32
Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32()
{
} // ~Inst_SOPK__S_CMPK_EQ_U32
// --- description from .arch file ---
// SCC = (S0.u == SIMM16).
void
Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() == simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LG_U32 class methods ---
Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_lg_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LG_U32
Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32()
{
} // ~Inst_SOPK__S_CMPK_LG_U32
// --- description from .arch file ---
// SCC = (S0.u != SIMM16).
void
Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() != simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_GT_U32 class methods ---
Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_gt_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_GT_U32
Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32()
{
} // ~Inst_SOPK__S_CMPK_GT_U32
// --- description from .arch file ---
// SCC = (S0.u > SIMM16).
void
Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() > simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_GE_U32 class methods ---
Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_ge_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_GE_U32
Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32()
{
} // ~Inst_SOPK__S_CMPK_GE_U32
// --- description from .arch file ---
// SCC = (S0.u >= SIMM16).
void
Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() >= simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LT_U32 class methods ---
Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_lt_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LT_U32
Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32()
{
} // ~Inst_SOPK__S_CMPK_LT_U32
// --- description from .arch file ---
// SCC = (S0.u < SIMM16).
void
Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() < simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_CMPK_LE_U32 class methods ---
Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cmpk_le_u32")
{
setFlag(ALU);
} // Inst_SOPK__S_CMPK_LE_U32
Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32()
{
} // ~Inst_SOPK__S_CMPK_LE_U32
// --- description from .arch file ---
// SCC = (S0.u <= SIMM16).
void
Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc = (src.rawData() <= simm16) ? 1 : 0;
scc.write();
} // execute
// --- Inst_SOPK__S_ADDK_I32 class methods ---
Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_addk_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_ADDK_I32
Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32()
{
} // ~Inst_SOPK__S_ADDK_I32
// --- description from .arch file ---
// D.i = D.i + signext(SIMM16);
// SCC = overflow.
void
Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI16 simm16 = instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16);
scc = (bits(src.rawData(), 31) == bits(simm16, 15)
&& bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOPK__S_MULK_I32 class methods ---
Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_mulk_i32")
{
setFlag(ALU);
} // Inst_SOPK__S_MULK_I32
Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32()
{
} // ~Inst_SOPK__S_MULK_I32
// --- description from .arch file ---
// D.i = D.i * signext(SIMM16).
void
Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI16 simm16 = instData.SIMM16;
ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16);
sdst.write();
} // execute
// --- Inst_SOPK__S_CBRANCH_I_FORK class methods ---
Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_cbranch_i_fork")
{
setFlag(Branch);
} // Inst_SOPK__S_CBRANCH_I_FORK
Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK()
{
} // ~Inst_SOPK__S_CBRANCH_I_FORK
// --- description from .arch file ---
// mask_pass = S0.u64 & EXEC;
// mask_fail = ~S0.u64 & EXEC;
// target_addr = PC + signext(SIMM16 * 4) + 4;
// if(mask_pass == EXEC)
// PC = target_addr;
// elsif(mask_fail == EXEC)
// PC += 4;
// elsif(bitcount(mask_fail) < bitcount(mask_pass))
// EXEC = mask_fail;
// SGPR[CSP*4] = { target_addr, mask_pass };
// CSP++;
// PC += 4;
// else
// EXEC = mask_pass;
// SGPR[CSP*4] = { PC + 4, mask_fail };
// CSP++;
// PC = target_addr;
// end.
// Conditional branch using branch-stack.
// S0 = compare mask(vcc or any sgpr), and
// SIMM16 = signed DWORD branch offset relative to next instruction.
// See also S_CBRANCH_JOIN.
void
Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOPK__S_GETREG_B32 class methods ---
Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_getreg_b32")
{
setFlag(ALU);
} // Inst_SOPK__S_GETREG_B32
Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32()
{
} // ~Inst_SOPK__S_GETREG_B32
// --- description from .arch file ---
// D.u = hardware-reg. Read some or all of a hardware register into the
// LSBs of D.
// SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
// is 1..32.
void
Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI16 simm16 = instData.SIMM16;
ScalarRegU32 hwregId = simm16 & 0x3f;
ScalarRegU32 offset = (simm16 >> 6) & 31;
ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
ScalarRegU32 hwreg =
gpuDynInst->computeUnit()->shader->getHwReg(hwregId);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
sdst.read();
// Store value from hardware to part of the SDST.
ScalarRegU32 mask = (((1U << size) - 1U) << offset);
sdst = (hwreg & mask) >> offset;
sdst.write();
} // execute
// --- Inst_SOPK__S_SETREG_B32 class methods ---
Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_setreg_b32")
{
setFlag(ALU);
} // Inst_SOPK__S_SETREG_B32
Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32()
{
} // ~Inst_SOPK__S_SETREG_B32
// --- description from .arch file ---
// hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware
// register.
// SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
// is 1..32.
void
Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI16 simm16 = instData.SIMM16;
ScalarRegU32 hwregId = simm16 & 0x3f;
ScalarRegU32 offset = (simm16 >> 6) & 31;
ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
ScalarRegU32 hwreg =
gpuDynInst->computeUnit()->shader->getHwReg(hwregId);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
sdst.read();
// Store value from SDST to part of the hardware register.
ScalarRegU32 mask = (((1U << size) - 1U) << offset);
hwreg = ((hwreg & ~mask) | ((sdst.rawData() << offset) & mask));
gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg);
// set MODE register to control the behavior of single precision
// floating-point numbers: denormal mode or round mode
if (hwregId==1 && size==2
&& (offset==4 || offset==0)) {
warn_once("Be cautious that s_setreg_b32 has no real effect "
"on FP modes: %s\n", gpuDynInst->disassemble());
return;
}
// panic if not changing MODE of floating-point numbers
panicUnimplemented();
} // execute
// --- Inst_SOPK__S_SETREG_IMM32_B32 class methods ---
Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32(
InFmt_SOPK *iFmt)
: Inst_SOPK(iFmt, "s_setreg_imm32_b32")
{
setFlag(ALU);
} // Inst_SOPK__S_SETREG_IMM32_B32
Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32()
{
} // ~Inst_SOPK__S_SETREG_IMM32_B32
// --- description from .arch file ---
// Write some or all of the LSBs of IMM32 into a hardware register; this
// --- instruction requires a 32-bit literal constant.
// SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
// is 1..32.
void
Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ScalarRegI16 simm16 = instData.SIMM16;
ScalarRegU32 hwregId = simm16 & 0x3f;
ScalarRegU32 offset = (simm16 >> 6) & 31;
ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
ScalarRegU32 hwreg =
gpuDynInst->computeUnit()->shader->getHwReg(hwregId);
ScalarRegI32 simm32 = extData.imm_u32;
// Store value from SIMM32 to part of the hardware register.
ScalarRegU32 mask = (((1U << size) - 1U) << offset);
hwreg = ((hwreg & ~mask) | ((simm32 << offset) & mask));
gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg);
// set MODE register to control the behavior of single precision
// floating-point numbers: denormal mode or round mode
if (hwregId==HW_REG_MODE && size==2
&& (offset==4 || offset==0)) {
warn_once("Be cautious that s_setreg_imm32_b32 has no real effect "
"on FP modes: %s\n", gpuDynInst->disassemble());
return;
}
// panic if not changing modes of single-precision FPs
panicUnimplemented();
} // execute
// --- Inst_SOP1__S_MOV_B32 class methods ---
Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_mov_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_MOV_B32
Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32()
{
} // ~Inst_SOP1__S_MOV_B32
// --- description from .arch file ---
// D.u = S0.u.
void
Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = src.rawData();
sdst.write();
} // execute
// --- Inst_SOP1__S_MOV_B64 class methods ---
Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_mov_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_MOV_B64
Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64()
{
} // ~Inst_SOP1__S_MOV_B64
// --- description from .arch file ---
// D.u64 = S0.u64.
void
Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = src.rawData();
sdst.write();
} // execute
// --- Inst_SOP1__S_CMOV_B32 class methods ---
Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_cmov_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_CMOV_B32
Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32()
{
} // ~Inst_SOP1__S_CMOV_B32
// --- description from .arch file ---
// (SCC) then D.u = S0.u;
// else NOP.
// Conditional move.
void
Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc.read();
if (scc.rawData()) {
sdst = src.rawData();
sdst.write();
}
} // execute
// --- Inst_SOP1__S_CMOV_B64 class methods ---
Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_cmov_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_CMOV_B64
Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64()
{
} // ~Inst_SOP1__S_CMOV_B64
// --- description from .arch file ---
// if(SCC) then D.u64 = S0.u64;
// else NOP.
// Conditional move.
void
Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
scc.read();
if (scc.rawData()) {
sdst = src.rawData();
sdst.write();
}
} // execute
// --- Inst_SOP1__S_NOT_B32 class methods ---
Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_not_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_NOT_B32
Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32()
{
} // ~Inst_SOP1__S_NOT_B32
// --- description from .arch file ---
// D.u = ~S0.u;
// SCC = 1 if result is non-zero.
// Bitwise negation.
void
Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = ~src.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_NOT_B64 class methods ---
Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_not_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_NOT_B64
Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64()
{
} // ~Inst_SOP1__S_NOT_B64
// --- description from .arch file ---
// D.u64 = ~S0.u64;
// SCC = 1 if result is non-zero.
// Bitwise negation.
void
Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = ~src.rawData();
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_WQM_B32 class methods ---
Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_wqm_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_WQM_B32
Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32()
{
} // ~Inst_SOP1__S_WQM_B32
// --- description from .arch file ---
// D[i] = (S0[(i & ~3):(i | 3)] != 0);
// Computes whole quad mode for an active/valid mask.
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wholeQuadMode(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_WQM_B64 class methods ---
Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_wqm_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_WQM_B64
Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64()
{
} // ~Inst_SOP1__S_WQM_B64
// --- description from .arch file ---
// D[i] = (S0[(i & ~3):(i | 3)] != 0);
// Computes whole quad mode for an active/valid mask.
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wholeQuadMode(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_BREV_B32 class methods ---
Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_brev_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BREV_B32
Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32()
{
} // ~Inst_SOP1__S_BREV_B32
// --- description from .arch file ---
// D.u[31:0] = S0.u[0:31] (reverse bits).
void
Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = reverseBits(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_BREV_B64 class methods ---
Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_brev_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BREV_B64
Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64()
{
} // ~Inst_SOP1__S_BREV_B64
// --- description from .arch file ---
// D.u64[63:0] = S0.u64[0:63] (reverse bits).
void
Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = reverseBits(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_BCNT0_I32_B32 class methods ---
Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bcnt0_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BCNT0_I32_B32
Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32()
{
} // ~Inst_SOP1__S_BCNT0_I32_B32
// --- description from .arch file ---
// D.i = CountZeroBits(S0.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = countZeroBits(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_BCNT0_I32_B64 class methods ---
Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bcnt0_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BCNT0_I32_B64
Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64()
{
} // ~Inst_SOP1__S_BCNT0_I32_B64
// --- description from .arch file ---
// D.i = CountZeroBits(S0.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = countZeroBits(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_BCNT1_I32_B32 class methods ---
Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bcnt1_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BCNT1_I32_B32
Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32()
{
} // ~Inst_SOP1__S_BCNT1_I32_B32
// --- description from .arch file ---
// D.i = CountOneBits(S0.u);
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = popCount(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_BCNT1_I32_B64 class methods ---
Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bcnt1_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BCNT1_I32_B64
Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64()
{
} // ~Inst_SOP1__S_BCNT1_I32_B64
// --- description from .arch file ---
// D.i = CountOneBits(S0.u64);
// SCC = 1 if result is non-zero.
void
Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = popCount(src.rawData());
scc = sdst.rawData() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_FF0_I32_B32 class methods ---
Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_ff0_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_FF0_I32_B32
Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32()
{
} // ~Inst_SOP1__S_FF0_I32_B32
// --- description from .arch file ---
// D.i = FindFirstZero(S0.u);
// If no zeros are found, return -1.
// Returns the bit position of the first zero from the LSB.
void
Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = findFirstZero(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FF0_I32_B64 class methods ---
Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_ff0_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_FF0_I32_B64
Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64()
{
} // ~Inst_SOP1__S_FF0_I32_B64
// --- description from .arch file ---
// D.i = FindFirstZero(S0.u64);
// If no zeros are found, return -1.
// Returns the bit position of the first zero from the LSB.
void
Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = findFirstZero(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FF1_I32_B32 class methods ---
Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_ff1_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_FF1_I32_B32
Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32()
{
} // ~Inst_SOP1__S_FF1_I32_B32
// --- description from .arch file ---
// D.i = FindFirstOne(S0.u);
// If no ones are found, return -1.
// Returns the bit position of the first one from the LSB.
void
Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = findFirstOne(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FF1_I32_B64 class methods ---
Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_ff1_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_FF1_I32_B64
Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64()
{
} // ~Inst_SOP1__S_FF1_I32_B64
// --- description from .arch file ---
// D.i = FindFirstOne(S0.u64);
// If no ones are found, return -1.
// Returns the bit position of the first one from the LSB.
void
Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = findFirstOne(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FLBIT_I32_B32 class methods ---
Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_flbit_i32_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_FLBIT_I32_B32
Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32()
{
} // ~Inst_SOP1__S_FLBIT_I32_B32
// --- description from .arch file ---
// D.i = FindFirstOne(S0.u);
// If no ones are found, return -1.
// Counts how many zeros before the first one starting from the MSB.
void
Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = countZeroBitsMsb(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FLBIT_I32_B64 class methods ---
Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_flbit_i32_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_FLBIT_I32_B64
Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64()
{
} // ~Inst_SOP1__S_FLBIT_I32_B64
// --- description from .arch file ---
// D.i = FindFirstOne(S0.u64);
// If no ones are found, return -1.
// Counts how many zeros before the first one starting from the MSB.
void
Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = countZeroBitsMsb(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FLBIT_I32 class methods ---
Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_flbit_i32")
{
setFlag(ALU);
} // Inst_SOP1__S_FLBIT_I32
Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32()
{
} // ~Inst_SOP1__S_FLBIT_I32
// --- description from .arch file ---
// D.i = FirstOppositeSignBit(S0.i);
// If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
// Counts how many bits in a row (from MSB to LSB) are the same as the
// sign bit.
void
Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = firstOppositeSignBit(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_FLBIT_I32_I64 class methods ---
Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_flbit_i32_i64")
{
setFlag(ALU);
} // Inst_SOP1__S_FLBIT_I32_I64
Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64()
{
} // ~Inst_SOP1__S_FLBIT_I32_I64
// --- description from .arch file ---
// D.i = FirstOppositeSignBit(S0.i64);
// If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
// Counts how many bits in a row (from MSB to LSB) are the same as the
// sign bit.
void
Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = firstOppositeSignBit(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_SEXT_I32_I8 class methods ---
Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_sext_i32_i8")
{
setFlag(ALU);
} // Inst_SOP1__S_SEXT_I32_I8
Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8()
{
} // ~Inst_SOP1__S_SEXT_I32_I8
// --- description from .arch file ---
// D.i = signext(S0.i[7:0]) (sign extension).
void
Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = sext<std::numeric_limits<ScalarRegI8>::digits>(
bits(src.rawData(), 7, 0));
sdst.write();
} // execute
// --- Inst_SOP1__S_SEXT_I32_I16 class methods ---
Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_sext_i32_i16")
{
setFlag(ALU);
} // Inst_SOP1__S_SEXT_I32_I16
Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16()
{
} // ~Inst_SOP1__S_SEXT_I32_I16
// --- description from .arch file ---
// D.i = signext(S0.i[15:0]) (sign extension).
void
Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = sext<std::numeric_limits<ScalarRegI16>::digits>(
bits(src.rawData(), 15, 0));
sdst.write();
} // execute
// --- Inst_SOP1__S_BITSET0_B32 class methods ---
Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bitset0_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BITSET0_B32
Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32()
{
} // ~Inst_SOP1__S_BITSET0_B32
// --- description from .arch file ---
// D.u[S0.u[4:0]] = 0.
void
Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst.setBit(bits(src.rawData(), 4, 0), 0);
sdst.write();
} // execute
// --- Inst_SOP1__S_BITSET0_B64 class methods ---
Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bitset0_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BITSET0_B64
Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64()
{
} // ~Inst_SOP1__S_BITSET0_B64
// --- description from .arch file ---
// D.u64[S0.u[5:0]] = 0.
void
Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst.setBit(bits(src.rawData(), 5, 0), 0);
sdst.write();
} // execute
// --- Inst_SOP1__S_BITSET1_B32 class methods ---
Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bitset1_b32")
{
setFlag(ALU);
} // Inst_SOP1__S_BITSET1_B32
Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32()
{
} // ~Inst_SOP1__S_BITSET1_B32
// --- description from .arch file ---
// D.u[S0.u[4:0]] = 1.
void
Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
src.read();
sdst.setBit(bits(src.rawData(), 4, 0), 1);
sdst.write();
} // execute
// --- Inst_SOP1__S_BITSET1_B64 class methods ---
Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_bitset1_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_BITSET1_B64
Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64()
{
} // ~Inst_SOP1__S_BITSET1_B64
// --- description from .arch file ---
// D.u64[S0.u[5:0]] = 1.
void
Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst)
{
ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst.setBit(bits(src.rawData(), 5, 0), 1);
sdst.write();
} // execute
// --- Inst_SOP1__S_GETPC_B64 class methods ---
Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_getpc_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_GETPC_B64
Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64()
{
} // ~Inst_SOP1__S_GETPC_B64
// --- description from .arch file ---
// D.u64 = PC + 4.
// Destination receives the byte address of the next instruction.
void
Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Addr pc = gpuDynInst->pc();
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
sdst = pc + 4;
sdst.write();
} // execute
// --- Inst_SOP1__S_SETPC_B64 class methods ---
Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_setpc_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_SETPC_B64
Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64()
{
} // ~Inst_SOP1__S_SETPC_B64
// --- description from .arch file ---
// PC = S0.u64.
// S0.u64 is a byte address of the instruction to jump to.
void
Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
src.read();
wf->pc(src.rawData());
} // execute
// --- Inst_SOP1__S_SWAPPC_B64 class methods ---
Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_swappc_b64")
{
setFlag(ALU);
} // Inst_SOP1__S_SWAPPC_B64
Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64()
{
} // ~Inst_SOP1__S_SWAPPC_B64
// --- description from .arch file ---
// D.u64 = PC + 4; PC = S0.u64.
// S0.u64 is a byte address of the instruction to jump to.
void
Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
Addr pc = gpuDynInst->pc();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
src.read();
sdst = pc + 4;
wf->pc(src.rawData());
sdst.write();
} // execute
// --- Inst_SOP1__S_RFE_B64 class methods ---
Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_rfe_b64")
{
} // Inst_SOP1__S_RFE_B64
Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64()
{
} // ~Inst_SOP1__S_RFE_B64
// --- description from .arch file ---
// PRIV = 0;
// PC = S0.u64.
// Return from exception handler and continue.
// This instruction may only be used within a trap handler.
void
Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
} // execute
// --- Inst_SOP1__S_AND_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_and_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_AND_SAVEEXEC_B64
Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_AND_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = S0.u64 & EXEC;
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = src.rawData() & wf->execMask().to_ullong();
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_OR_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_or_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_OR_SAVEEXEC_B64
Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_OR_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = S0.u64 | EXEC;
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = src.rawData() | wf->execMask().to_ullong();
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_XOR_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_xor_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_XOR_SAVEEXEC_B64
Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_XOR_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = S0.u64 ^ EXEC;
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = src.rawData() ^ wf->execMask().to_ullong();
scc = wf->execMask().any() ? 1 : 0;
sdst.write();
scc.write();
} // execute
// --- Inst_SOP1__S_ANDN2_SAVEEXEC_B64 class methods ---
Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64(
InFmt_SOP1 *iFmt)
: Inst_SOP1(iFmt, "s_andn2_saveexec_b64")
{
setFlag(ALU);
setFlag(ReadsEXEC);
setFlag(WritesEXEC);
} // Inst_SOP1__S_ANDN2_SAVEEXEC_B64
Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64()
{
} // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64
// --- description from .arch file ---
// D.u64 = EXEC;
// EXEC = S0.u64 & ~EXEC;
// SCC = 1 if the new value of EXEC is non-zero.
void
Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
ScalarOperandU32 scc(gpuDynInst, REG_SCC);
src.read();
sdst = wf->execMask().to_ullong();
wf->execMask() = src.rawData() &~ wf->execMask().to_ullong();
scc = wf->execMask().any() ?