| /* |
| * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * 3. Neither the name of the copyright holder nor the names of its |
| * contributors may be used to endorse or promote products derived from this |
| * software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "arch/amdgpu/vega/insts/instructions.hh" |
| |
| #include <cmath> |
| |
| #include "arch/amdgpu/vega/insts/inst_util.hh" |
| #include "debug/VEGA.hh" |
| #include "debug/GPUSync.hh" |
| #include "dev/amdgpu/hwreg_defines.hh" |
| #include "gpu-compute/shader.hh" |
| |
| namespace gem5 |
| { |
| |
| namespace VegaISA |
| { |
| // --- Inst_SOP2__S_ADD_U32 class methods --- |
| |
| Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_add_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ADD_U32 |
| |
| Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32() |
| { |
| } // ~Inst_SOP2__S_ADD_U32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u + S1.u; |
| // SCC = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an unsigned |
| // --- overflow/carry-out for S_ADDC_U32. |
| void |
| Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() + src1.rawData(); |
| scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()) |
| >= 0x100000000ULL ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_SUB_U32 class methods --- |
| |
| Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_sub_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_SUB_U32 |
| |
| Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32() |
| { |
| } // ~Inst_SOP2__S_SUB_U32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u - S1.u; |
| // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out for |
| // --- S_SUBB_U32. |
| void |
| Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() - src1.rawData(); |
| scc = (src1.rawData() > src0.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_ADD_I32 class methods --- |
| |
| Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_add_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ADD_I32 |
| |
| Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32() |
| { |
| } // ~Inst_SOP2__S_ADD_I32 |
| |
| // --- description from .arch file --- |
| // D.i = S0.i + S1.i; |
| // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed |
| // overflow. |
| // This opcode is not suitable for use with S_ADDC_U32 for implementing |
| // 64-bit operations. |
| void |
| Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() + src1.rawData(); |
| scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31) |
| && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) |
| ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_SUB_I32 class methods --- |
| |
| Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_sub_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_SUB_I32 |
| |
| Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32() |
| { |
| } // ~Inst_SOP2__S_SUB_I32 |
| |
| // --- description from .arch file --- |
| // D.i = S0.i - S1.i; |
| // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed |
| // overflow. |
| // CAUTION: The condition code behaviour for this opcode is inconsistent |
| // with V_SUB_I32; see V_SUB_I32 for further details. |
| // This opcode is not suitable for use with S_SUBB_U32 for implementing |
| // 64-bit operations. |
| void |
| Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() - src1.rawData(); |
| scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31) |
| && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_ADDC_U32 class methods --- |
| |
| Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_addc_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ADDC_U32 |
| |
| Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32() |
| { |
| } // ~Inst_SOP2__S_ADDC_U32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u + S1.u + SCC; |
| // SCC = (S0.u + S1.u + SCC >= 0x800000000ULL ? 1 : 0) is an unsigned |
| // overflow. |
| void |
| Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| scc.read(); |
| |
| sdst = src0.rawData() + src1.rawData() + scc.rawData(); |
| scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData() |
| + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_SUBB_U32 class methods --- |
| |
| Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_subb_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_SUBB_U32 |
| |
| Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32() |
| { |
| } // ~Inst_SOP2__S_SUBB_U32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u - S1.u - SCC; |
| // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow. |
| void |
| Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| scc.read(); |
| |
| sdst = src0.rawData() - src1.rawData() - scc.rawData(); |
| scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_MIN_I32 class methods --- |
| |
| Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_min_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MIN_I32 |
| |
| Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32() |
| { |
| } // ~Inst_SOP2__S_MIN_I32 |
| |
| // --- description from .arch file --- |
| // D.i = (S0.i < S1.i) ? S0.i : S1.i; |
| // SCC = 1 if S0 is chosen as the minimum value. |
| void |
| Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = std::min(src0.rawData(), src1.rawData()); |
| scc = (src0.rawData() < src1.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_MIN_U32 class methods --- |
| |
| Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_min_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MIN_U32 |
| |
| Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32() |
| { |
| } // ~Inst_SOP2__S_MIN_U32 |
| |
| // --- description from .arch file --- |
| // D.u = (S0.u < S1.u) ? S0.u : S1.u; |
| // SCC = 1 if S0 is chosen as the minimum value. |
| void |
| Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = std::min(src0.rawData(), src1.rawData()); |
| scc = (src0.rawData() < src1.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_MAX_I32 class methods --- |
| |
| Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_max_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MAX_I32 |
| |
| Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32() |
| { |
| } // ~Inst_SOP2__S_MAX_I32 |
| |
| // --- description from .arch file --- |
| // D.i = (S0.i > S1.i) ? S0.i : S1.i; |
| // SCC = 1 if S0 is chosen as the maximum value. |
| void |
| Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = std::max(src0.rawData(), src1.rawData()); |
| scc = (src0.rawData() > src1.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_MAX_U32 class methods --- |
| |
| Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_max_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MAX_U32 |
| |
| Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32() |
| { |
| } // ~Inst_SOP2__S_MAX_U32 |
| |
| // --- description from .arch file --- |
| // D.u = (S0.u > S1.u) ? S0.u : S1.u; |
| // SCC = 1 if S0 is chosen as the maximum value. |
| void |
| Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = std::max(src0.rawData(), src1.rawData()); |
| scc = (src0.rawData() > src1.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_CSELECT_B32 class methods --- |
| |
| Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_cselect_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_CSELECT_B32 |
| |
| Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32() |
| { |
| } // ~Inst_SOP2__S_CSELECT_B32 |
| |
| // --- description from .arch file --- |
| // D.u = SCC ? S0.u : S1.u (conditional select). |
| void |
| Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| scc.read(); |
| |
| sdst = scc.rawData() ? src0.rawData() : src1.rawData(); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP2__S_CSELECT_B64 class methods --- |
| |
| Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_cselect_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_CSELECT_B64 |
| |
| Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64() |
| { |
| } // ~Inst_SOP2__S_CSELECT_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select). |
| void |
| Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| scc.read(); |
| |
| sdst = scc.rawData() ? src0.rawData() : src1.rawData(); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP2__S_AND_B32 class methods --- |
| |
| Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_and_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_AND_B32 |
| |
| Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32() |
| { |
| } // ~Inst_SOP2__S_AND_B32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u & S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() & src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_AND_B64 class methods --- |
| |
| Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_and_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_AND_B64 |
| |
| Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64() |
| { |
| } // ~Inst_SOP2__S_AND_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = S0.u64 & S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() & src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_OR_B32 class methods --- |
| |
| Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_or_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_OR_B32 |
| |
| Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32() |
| { |
| } // ~Inst_SOP2__S_OR_B32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u | S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() | src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_OR_B64 class methods --- |
| |
| Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_or_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_OR_B64 |
| |
| Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64() |
| { |
| } // ~Inst_SOP2__S_OR_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = S0.u64 | S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() | src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_XOR_B32 class methods --- |
| |
| Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_xor_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_XOR_B32 |
| |
| Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32() |
| { |
| } // ~Inst_SOP2__S_XOR_B32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u ^ S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() ^ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_XOR_B64 class methods --- |
| |
| Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_xor_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_XOR_B64 |
| |
| Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64() |
| { |
| } // ~Inst_SOP2__S_XOR_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = S0.u64 ^ S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() ^ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_ANDN2_B32 class methods --- |
| |
| Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_andn2_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ANDN2_B32 |
| |
| Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32() |
| { |
| } // ~Inst_SOP2__S_ANDN2_B32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u & ~S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() &~ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_ANDN2_B64 class methods --- |
| |
| Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_andn2_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ANDN2_B64 |
| |
| Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64() |
| { |
| } // ~Inst_SOP2__S_ANDN2_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = S0.u64 & ~S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() &~ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_ORN2_B32 class methods --- |
| |
| Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_orn2_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ORN2_B32 |
| |
| Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32() |
| { |
| } // ~Inst_SOP2__S_ORN2_B32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u | ~S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() |~ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_ORN2_B64 class methods --- |
| |
| Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_orn2_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ORN2_B64 |
| |
| Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64() |
| { |
| } // ~Inst_SOP2__S_ORN2_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = S0.u64 | ~S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() |~ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_NAND_B32 class methods --- |
| |
| Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_nand_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_NAND_B32 |
| |
| Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32() |
| { |
| } // ~Inst_SOP2__S_NAND_B32 |
| |
| // --- description from .arch file --- |
| // D.u = ~(S0.u & S1.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() & src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_NAND_B64 class methods --- |
| |
| Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_nand_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_NAND_B64 |
| |
| Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64() |
| { |
| } // ~Inst_SOP2__S_NAND_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = ~(S0.u64 & S1.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() & src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_NOR_B32 class methods --- |
| |
| Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_nor_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_NOR_B32 |
| |
| Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32() |
| { |
| } // ~Inst_SOP2__S_NOR_B32 |
| |
| // --- description from .arch file --- |
| // D.u = ~(S0.u | S1.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() | src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_NOR_B64 class methods --- |
| |
| Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_nor_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_NOR_B64 |
| |
| Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64() |
| { |
| } // ~Inst_SOP2__S_NOR_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = ~(S0.u64 | S1.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() | src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_XNOR_B32 class methods --- |
| |
| Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_xnor_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_XNOR_B32 |
| |
| Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32() |
| { |
| } // ~Inst_SOP2__S_XNOR_B32 |
| |
| // --- description from .arch file --- |
| // D.u = ~(S0.u ^ S1.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() ^ src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_XNOR_B64 class methods --- |
| |
| Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_xnor_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_XNOR_B64 |
| |
| Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64() |
| { |
| } // ~Inst_SOP2__S_XNOR_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = ~(S0.u64 ^ S1.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() ^ src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_LSHL_B32 class methods --- |
| |
| Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_lshl_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_LSHL_B32 |
| |
| Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32() |
| { |
| } // ~Inst_SOP2__S_LSHL_B32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u << S1.u[4:0]; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() << bits(src1.rawData(), 4, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_LSHL_B64 class methods --- |
| |
| Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_lshl_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_LSHL_B64 |
| |
| Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64() |
| { |
| } // ~Inst_SOP2__S_LSHL_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = S0.u64 << S1.u[5:0]; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() << bits(src1.rawData(), 5, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_LSHR_B32 class methods --- |
| |
| Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_lshr_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_LSHR_B32 |
| |
| Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32() |
| { |
| } // ~Inst_SOP2__S_LSHR_B32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u >> S1.u[4:0]; |
| // SCC = 1 if result is non-zero. |
| // The vacated bits are set to zero. |
| void |
| Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_LSHR_B64 class methods --- |
| |
| Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_lshr_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_LSHR_B64 |
| |
| Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64() |
| { |
| } // ~Inst_SOP2__S_LSHR_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = S0.u64 >> S1.u[5:0]; |
| // SCC = 1 if result is non-zero. |
| // The vacated bits are set to zero. |
| void |
| Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_ASHR_I32 class methods --- |
| |
| Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_ashr_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ASHR_I32 |
| |
| Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32() |
| { |
| } // ~Inst_SOP2__S_ASHR_I32 |
| |
| // --- description from .arch file --- |
| // D.i = signext(S0.i) >> S1.u[4:0]; |
| // SCC = 1 if result is non-zero. |
| // The vacated bits are set to the sign bit of the input value. |
| void |
| Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_ASHR_I64 class methods --- |
| |
| Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_ashr_i64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ASHR_I64 |
| |
| Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64() |
| { |
| } // ~Inst_SOP2__S_ASHR_I64 |
| |
| // --- description from .arch file --- |
| // D.i64 = signext(S0.i64) >> S1.u[5:0]; |
| // SCC = 1 if result is non-zero. |
| // The vacated bits are set to the sign bit of the input value. |
| void |
| Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_BFM_B32 class methods --- |
| |
| Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfm_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFM_B32 |
| |
| Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32() |
| { |
| } // ~Inst_SOP2__S_BFM_B32 |
| |
| // --- description from .arch file --- |
| // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask). |
| void |
| Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1) |
| << bits(src1.rawData(), 4, 0); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP2__S_BFM_B64 class methods --- |
| |
| Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfm_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFM_B64 |
| |
| Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64() |
| { |
| } // ~Inst_SOP2__S_BFM_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask). |
| void |
| Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1) |
| << bits(src1.rawData(), 5, 0); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP2__S_MUL_I32 class methods --- |
| |
| Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_mul_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MUL_I32 |
| |
| Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32() |
| { |
| } // ~Inst_SOP2__S_MUL_I32 |
| |
| // --- description from .arch file --- |
| // D.i = S0.i * S1.i. |
| void |
| Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() * src1.rawData(); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP2__S_BFE_U32 class methods --- |
| |
| Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfe_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFE_U32 |
| |
| Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32() |
| { |
| } // ~Inst_SOP2__S_BFE_U32 |
| |
| // --- description from .arch file --- |
| // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is |
| // field width. |
| // D.u = (S0.u>>S1.u[4:0]) & ((1<<S1.u[22:16])-1); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)) |
| & ((1 << bits(src1.rawData(), 22, 16)) - 1); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_BFE_I32 class methods --- |
| |
| Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfe_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFE_I32 |
| |
| Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32() |
| { |
| } // ~Inst_SOP2__S_BFE_I32 |
| |
| // --- description from .arch file --- |
| // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is |
| // field width. |
| // D.i = (S0.i>>S1.u[4:0]) & ((1<<S1.u[22:16])-1); |
| // Sign-extend the result; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)) |
| & ((1 << bits(src1.rawData(), 22, 16)) - 1); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_BFE_U64 class methods --- |
| |
| Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfe_u64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFE_U64 |
| |
| Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64() |
| { |
| } // ~Inst_SOP2__S_BFE_U64 |
| |
| // --- description from .arch file --- |
| // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is |
| // field width. |
| // D.u64 = (S0.u64>>S1.u[5:0]) & ((1<<S1.u[22:16])-1); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)) |
| & ((1 << bits(src1.rawData(), 22, 16)) - 1); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_BFE_I64 class methods --- |
| |
| Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfe_i64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFE_I64 |
| |
| Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64() |
| { |
| } // ~Inst_SOP2__S_BFE_I64 |
| |
| // --- description from .arch file --- |
| // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is |
| // field width. |
| // D.i64 = (S0.i64>>S1.u[5:0]) & ((1<<S1.u[22:16])-1); |
| // Sign-extend result; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)) |
| & ((1 << bits(src1.rawData(), 22, 16)) - 1); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_CBRANCH_G_FORK class methods --- |
| |
| Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_cbranch_g_fork") |
| { |
| setFlag(Branch); |
| } // Inst_SOP2__S_CBRANCH_G_FORK |
| |
| Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK() |
| { |
| } // ~Inst_SOP2__S_CBRANCH_G_FORK |
| |
| // --- description from .arch file --- |
| // mask_pass = S0.u64 & EXEC; |
| // mask_fail = ~S0.u64 & EXEC; |
| // if(mask_pass == EXEC) |
| // PC = S1.u64; |
| // elsif(mask_fail == EXEC) |
| // PC += 4; |
| // elsif(bitcount(mask_fail) < bitcount(mask_pass)) |
| // EXEC = mask_fail; |
| // SGPR[CSP*4] = { S1.u64, mask_pass }; |
| // CSP++; |
| // PC += 4; |
| // else |
| // EXEC = mask_pass; |
| // SGPR[CSP*4] = { PC + 4, mask_fail }; |
| // CSP++; |
| // PC = S1.u64; |
| // end. |
| // Conditional branch using branch-stack. |
| // S0 = compare mask(vcc or any sgpr) and |
| // S1 = 64-bit byte address of target instruction. |
| // See also S_CBRANCH_JOIN. |
| void |
| Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } // execute |
| // --- Inst_SOP2__S_ABSDIFF_I32 class methods --- |
| |
| Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_absdiff_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ABSDIFF_I32 |
| |
| Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32() |
| { |
| } // ~Inst_SOP2__S_ABSDIFF_I32 |
| |
| // --- description from .arch file --- |
| // D.i = S0.i - S1.i; |
| // if(D.i < 0) then D.i = -D.i; |
| // SCC = 1 if result is non-zero. |
| // Compute the absolute value of difference between two values. |
| void |
| Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| sdst = std::abs(src0.rawData() - src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP2__S_RFE_RESTORE_B64 class methods --- |
| |
| Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64( |
| InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_rfe_restore_b64") |
| { |
| } // Inst_SOP2__S_RFE_RESTORE_B64 |
| |
| Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64() |
| { |
| } // ~Inst_SOP2__S_RFE_RESTORE_B64 |
| |
| // --- description from .arch file --- |
| // PRIV = 0; |
| // PC = S0.u64; |
| // INST_ATC = S1.u32[0]. |
| // Return from exception handler and continue, possibly changing the |
| // --- instruction ATC mode. |
| // This instruction may only be used within a trap handler. |
| // Use this instruction when the main program may be in a different memory |
| // --- space than the trap handler. |
| void |
| Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } // execute |
| // --- Inst_SOP2__S_MUL_HI_U32 class methods --- |
| |
| Inst_SOP2__S_MUL_HI_U32::Inst_SOP2__S_MUL_HI_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_mul_hi_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MUL_HI_U32 |
| |
| Inst_SOP2__S_MUL_HI_U32::~Inst_SOP2__S_MUL_HI_U32() |
| { |
| } // ~Inst_SOP2__S_MUL_HI_U32 |
| |
| // --- description from .arch file --- |
| // D.u = (S0.u * S1.u) >> 32; |
| void |
| Inst_SOP2__S_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src0.read(); |
| src1.read(); |
| |
| VecElemU64 tmp_dst = |
| ((VecElemU64)src0.rawData() * (VecElemU64)src1.rawData()); |
| sdst = (tmp_dst >> 32); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP2__S_MUL_HI_I32 class methods --- |
| |
| Inst_SOP2__S_MUL_HI_I32::Inst_SOP2__S_MUL_HI_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_mul_hi_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MUL_HI_I32 |
| |
| Inst_SOP2__S_MUL_HI_I32::~Inst_SOP2__S_MUL_HI_I32() |
| { |
| } // ~Inst_SOP2__S_MUL_HI_I32 |
| |
| // --- description from .arch file --- |
| // D.u = (S0.u * S1.u) >> 32; |
| void |
| Inst_SOP2__S_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src0.read(); |
| src1.read(); |
| |
| VecElemI64 tmp_src0 = |
| sext<std::numeric_limits<VecElemI64>::digits>(src0.rawData()); |
| VecElemI64 tmp_src1 = |
| sext<std::numeric_limits<VecElemI64>::digits>(src1.rawData()); |
| sdst = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOPK__S_MOVK_I32 class methods --- |
| |
| Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_movk_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_MOVK_I32 |
| |
| Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32() |
| { |
| } // ~Inst_SOPK__S_MOVK_I32 |
| |
| // --- description from .arch file --- |
| // D.i = signext(SIMM16) (sign extension). |
| void |
| Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| sdst = simm16; |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMOVK_I32 class methods --- |
| |
| Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmovk_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMOVK_I32 |
| |
| Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32() |
| { |
| } // ~Inst_SOPK__S_CMOVK_I32 |
| |
| // --- description from .arch file --- |
| // if(SCC) then D.i = signext(SIMM16); |
| // else NOP. |
| // Conditional move with sign extension. |
| void |
| Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| scc.read(); |
| |
| if (scc.rawData()) { |
| sdst = simm16; |
| sdst.write(); |
| } |
| } // execute |
| // --- Inst_SOPK__S_CMPK_EQ_I32 class methods --- |
| |
| Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_eq_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_EQ_I32 |
| |
| Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_EQ_I32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.i == signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() == simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_LG_I32 class methods --- |
| |
| Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_lg_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LG_I32 |
| |
| Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LG_I32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.i != signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() != simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_GT_I32 class methods --- |
| |
| Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_gt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_GT_I32 |
| |
| Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_GT_I32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.i > signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() > simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_GE_I32 class methods --- |
| |
| Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_ge_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_GE_I32 |
| |
| Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_GE_I32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.i >= signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() >= simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_LT_I32 class methods --- |
| |
| Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_lt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LT_I32 |
| |
| Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LT_I32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.i < signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() < simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_LE_I32 class methods --- |
| |
| Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_le_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LE_I32 |
| |
| Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LE_I32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.i <= signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() <= simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_EQ_U32 class methods --- |
| |
| Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_eq_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_EQ_U32 |
| |
| Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_EQ_U32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.u == SIMM16). |
| void |
| Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() == simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_LG_U32 class methods --- |
| |
| Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_lg_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LG_U32 |
| |
| Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LG_U32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.u != SIMM16). |
| void |
| Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() != simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_GT_U32 class methods --- |
| |
| Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_gt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_GT_U32 |
| |
| Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_GT_U32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.u > SIMM16). |
| void |
| Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() > simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_GE_U32 class methods --- |
| |
| Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_ge_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_GE_U32 |
| |
| Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_GE_U32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.u >= SIMM16). |
| void |
| Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() >= simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_LT_U32 class methods --- |
| |
| Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_lt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LT_U32 |
| |
| Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LT_U32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.u < SIMM16). |
| void |
| Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() < simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_CMPK_LE_U32 class methods --- |
| |
| Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_le_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LE_U32 |
| |
| Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LE_U32 |
| |
| // --- description from .arch file --- |
| // SCC = (S0.u <= SIMM16). |
| void |
| Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() <= simm16) ? 1 : 0; |
| |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_ADDK_I32 class methods --- |
| |
| Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_addk_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_ADDK_I32 |
| |
| Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32() |
| { |
| } // ~Inst_SOPK__S_ADDK_I32 |
| |
| // --- description from .arch file --- |
| // D.i = D.i + signext(SIMM16); |
| // SCC = overflow. |
| void |
| Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); |
| scc = (bits(src.rawData(), 31) == bits(simm16, 15) |
| && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOPK__S_MULK_I32 class methods --- |
| |
| Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_mulk_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_MULK_I32 |
| |
| Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32() |
| { |
| } // ~Inst_SOPK__S_MULK_I32 |
| |
| // --- description from .arch file --- |
| // D.i = D.i * signext(SIMM16). |
| void |
| Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOPK__S_CBRANCH_I_FORK class methods --- |
| |
| Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cbranch_i_fork") |
| { |
| setFlag(Branch); |
| } // Inst_SOPK__S_CBRANCH_I_FORK |
| |
| Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK() |
| { |
| } // ~Inst_SOPK__S_CBRANCH_I_FORK |
| |
| // --- description from .arch file --- |
| // mask_pass = S0.u64 & EXEC; |
| // mask_fail = ~S0.u64 & EXEC; |
| // target_addr = PC + signext(SIMM16 * 4) + 4; |
| // if(mask_pass == EXEC) |
| // PC = target_addr; |
| // elsif(mask_fail == EXEC) |
| // PC += 4; |
| // elsif(bitcount(mask_fail) < bitcount(mask_pass)) |
| // EXEC = mask_fail; |
| // SGPR[CSP*4] = { target_addr, mask_pass }; |
| // CSP++; |
| // PC += 4; |
| // else |
| // EXEC = mask_pass; |
| // SGPR[CSP*4] = { PC + 4, mask_fail }; |
| // CSP++; |
| // PC = target_addr; |
| // end. |
| // Conditional branch using branch-stack. |
| // S0 = compare mask(vcc or any sgpr), and |
| // SIMM16 = signed DWORD branch offset relative to next instruction. |
| // See also S_CBRANCH_JOIN. |
| void |
| Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } // execute |
| // --- Inst_SOPK__S_GETREG_B32 class methods --- |
| |
| Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_getreg_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_GETREG_B32 |
| |
| Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32() |
| { |
| } // ~Inst_SOPK__S_GETREG_B32 |
| |
| // --- description from .arch file --- |
| // D.u = hardware-reg. Read some or all of a hardware register into the |
| // LSBs of D. |
| // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size |
| // is 1..32. |
| void |
| Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ScalarRegU32 hwregId = simm16 & 0x3f; |
| ScalarRegU32 offset = (simm16 >> 6) & 31; |
| ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; |
| |
| ScalarRegU32 hwreg = |
| gpuDynInst->computeUnit()->shader->getHwReg(hwregId); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| sdst.read(); |
| |
| // Store value from hardware to part of the SDST. |
| ScalarRegU32 mask = (((1U << size) - 1U) << offset); |
| sdst = (hwreg & mask) >> offset; |
| sdst.write(); |
| } // execute |
| // --- Inst_SOPK__S_SETREG_B32 class methods --- |
| |
| Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_setreg_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_SETREG_B32 |
| |
| Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32() |
| { |
| } // ~Inst_SOPK__S_SETREG_B32 |
| |
| // --- description from .arch file --- |
| // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware |
| // register. |
| // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size |
| // is 1..32. |
| void |
| Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ScalarRegU32 hwregId = simm16 & 0x3f; |
| ScalarRegU32 offset = (simm16 >> 6) & 31; |
| ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; |
| |
| ScalarRegU32 hwreg = |
| gpuDynInst->computeUnit()->shader->getHwReg(hwregId); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| sdst.read(); |
| |
| // Store value from SDST to part of the hardware register. |
| ScalarRegU32 mask = (((1U << size) - 1U) << offset); |
| hwreg = ((hwreg & ~mask) | ((sdst.rawData() << offset) & mask)); |
| gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); |
| |
| // set MODE register to control the behavior of single precision |
| // floating-point numbers: denormal mode or round mode |
| if (hwregId==1 && size==2 |
| && (offset==4 || offset==0)) { |
| warn_once("Be cautious that s_setreg_b32 has no real effect " |
| "on FP modes: %s\n", gpuDynInst->disassemble()); |
| return; |
| } |
| |
| // panic if not changing MODE of floating-point numbers |
| panicUnimplemented(); |
| } // execute |
| // --- Inst_SOPK__S_SETREG_IMM32_B32 class methods --- |
| |
| Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32( |
| InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_setreg_imm32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_SETREG_IMM32_B32 |
| |
| Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32() |
| { |
| } // ~Inst_SOPK__S_SETREG_IMM32_B32 |
| |
| // --- description from .arch file --- |
| // Write some or all of the LSBs of IMM32 into a hardware register; this |
| // --- instruction requires a 32-bit literal constant. |
| // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size |
| // is 1..32. |
| void |
| Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ScalarRegU32 hwregId = simm16 & 0x3f; |
| ScalarRegU32 offset = (simm16 >> 6) & 31; |
| ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; |
| |
| ScalarRegU32 hwreg = |
| gpuDynInst->computeUnit()->shader->getHwReg(hwregId); |
| ScalarRegI32 simm32 = extData.imm_u32; |
| |
| // Store value from SIMM32 to part of the hardware register. |
| ScalarRegU32 mask = (((1U << size) - 1U) << offset); |
| hwreg = ((hwreg & ~mask) | ((simm32 << offset) & mask)); |
| gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); |
| |
| // set MODE register to control the behavior of single precision |
| // floating-point numbers: denormal mode or round mode |
| if (hwregId==HW_REG_MODE && size==2 |
| && (offset==4 || offset==0)) { |
| warn_once("Be cautious that s_setreg_imm32_b32 has no real effect " |
| "on FP modes: %s\n", gpuDynInst->disassemble()); |
| return; |
| } |
| |
| // panic if not changing modes of single-precision FPs |
| panicUnimplemented(); |
| } // execute |
| // --- Inst_SOP1__S_MOV_B32 class methods --- |
| |
| Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_mov_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_MOV_B32 |
| |
| Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32() |
| { |
| } // ~Inst_SOP1__S_MOV_B32 |
| |
| // --- description from .arch file --- |
| // D.u = S0.u. |
| void |
| Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = src.rawData(); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_MOV_B64 class methods --- |
| |
| Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_mov_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_MOV_B64 |
| |
| Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64() |
| { |
| } // ~Inst_SOP1__S_MOV_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = S0.u64. |
| void |
| Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = src.rawData(); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_CMOV_B32 class methods --- |
| |
| Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_cmov_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_CMOV_B32 |
| |
| Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32() |
| { |
| } // ~Inst_SOP1__S_CMOV_B32 |
| |
| // --- description from .arch file --- |
| // (SCC) then D.u = S0.u; |
| // else NOP. |
| // Conditional move. |
| void |
| Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| scc.read(); |
| |
| if (scc.rawData()) { |
| sdst = src.rawData(); |
| sdst.write(); |
| } |
| } // execute |
| // --- Inst_SOP1__S_CMOV_B64 class methods --- |
| |
| Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_cmov_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_CMOV_B64 |
| |
| Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64() |
| { |
| } // ~Inst_SOP1__S_CMOV_B64 |
| |
| // --- description from .arch file --- |
| // if(SCC) then D.u64 = S0.u64; |
| // else NOP. |
| // Conditional move. |
| void |
| Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| scc.read(); |
| |
| if (scc.rawData()) { |
| sdst = src.rawData(); |
| sdst.write(); |
| } |
| } // execute |
| // --- Inst_SOP1__S_NOT_B32 class methods --- |
| |
| Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_not_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_NOT_B32 |
| |
| Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32() |
| { |
| } // ~Inst_SOP1__S_NOT_B32 |
| |
| // --- description from .arch file --- |
| // D.u = ~S0.u; |
| // SCC = 1 if result is non-zero. |
| // Bitwise negation. |
| void |
| Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = ~src.rawData(); |
| |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_NOT_B64 class methods --- |
| |
| Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_not_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_NOT_B64 |
| |
| Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64() |
| { |
| } // ~Inst_SOP1__S_NOT_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = ~S0.u64; |
| // SCC = 1 if result is non-zero. |
| // Bitwise negation. |
| void |
| Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = ~src.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_WQM_B32 class methods --- |
| |
| Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_wqm_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_WQM_B32 |
| |
| Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32() |
| { |
| } // ~Inst_SOP1__S_WQM_B32 |
| |
| // --- description from .arch file --- |
| // D[i] = (S0[(i & ~3):(i | 3)] != 0); |
| // Computes whole quad mode for an active/valid mask. |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wholeQuadMode(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_WQM_B64 class methods --- |
| |
| Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_wqm_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_WQM_B64 |
| |
| Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64() |
| { |
| } // ~Inst_SOP1__S_WQM_B64 |
| |
| // --- description from .arch file --- |
| // D[i] = (S0[(i & ~3):(i | 3)] != 0); |
| // Computes whole quad mode for an active/valid mask. |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wholeQuadMode(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_BREV_B32 class methods --- |
| |
| Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_brev_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BREV_B32 |
| |
| Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32() |
| { |
| } // ~Inst_SOP1__S_BREV_B32 |
| |
| // --- description from .arch file --- |
| // D.u[31:0] = S0.u[0:31] (reverse bits). |
| void |
| Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = reverseBits(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_BREV_B64 class methods --- |
| |
| Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_brev_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BREV_B64 |
| |
| Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64() |
| { |
| } // ~Inst_SOP1__S_BREV_B64 |
| |
| // --- description from .arch file --- |
| // D.u64[63:0] = S0.u64[0:63] (reverse bits). |
| void |
| Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = reverseBits(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_BCNT0_I32_B32 class methods --- |
| |
| Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bcnt0_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BCNT0_I32_B32 |
| |
| Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32() |
| { |
| } // ~Inst_SOP1__S_BCNT0_I32_B32 |
| |
| // --- description from .arch file --- |
| // D.i = CountZeroBits(S0.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = countZeroBits(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_BCNT0_I32_B64 class methods --- |
| |
| Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bcnt0_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BCNT0_I32_B64 |
| |
| Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64() |
| { |
| } // ~Inst_SOP1__S_BCNT0_I32_B64 |
| |
| // --- description from .arch file --- |
| // D.i = CountZeroBits(S0.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = countZeroBits(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_BCNT1_I32_B32 class methods --- |
| |
| Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bcnt1_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BCNT1_I32_B32 |
| |
| Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32() |
| { |
| } // ~Inst_SOP1__S_BCNT1_I32_B32 |
| |
| // --- description from .arch file --- |
| // D.i = CountOneBits(S0.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = popCount(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_BCNT1_I32_B64 class methods --- |
| |
| Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bcnt1_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BCNT1_I32_B64 |
| |
| Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64() |
| { |
| } // ~Inst_SOP1__S_BCNT1_I32_B64 |
| |
| // --- description from .arch file --- |
| // D.i = CountOneBits(S0.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = popCount(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_FF0_I32_B32 class methods --- |
| |
| Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_ff0_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FF0_I32_B32 |
| |
| Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32() |
| { |
| } // ~Inst_SOP1__S_FF0_I32_B32 |
| |
| // --- description from .arch file --- |
| // D.i = FindFirstZero(S0.u); |
| // If no zeros are found, return -1. |
| // Returns the bit position of the first zero from the LSB. |
| void |
| Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = findFirstZero(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_FF0_I32_B64 class methods --- |
| |
| Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_ff0_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FF0_I32_B64 |
| |
| Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64() |
| { |
| } // ~Inst_SOP1__S_FF0_I32_B64 |
| |
| // --- description from .arch file --- |
| // D.i = FindFirstZero(S0.u64); |
| // If no zeros are found, return -1. |
| // Returns the bit position of the first zero from the LSB. |
| void |
| Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = findFirstZero(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_FF1_I32_B32 class methods --- |
| |
| Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_ff1_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FF1_I32_B32 |
| |
| Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32() |
| { |
| } // ~Inst_SOP1__S_FF1_I32_B32 |
| |
| // --- description from .arch file --- |
| // D.i = FindFirstOne(S0.u); |
| // If no ones are found, return -1. |
| // Returns the bit position of the first one from the LSB. |
| void |
| Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = findFirstOne(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_FF1_I32_B64 class methods --- |
| |
| Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_ff1_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FF1_I32_B64 |
| |
| Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64() |
| { |
| } // ~Inst_SOP1__S_FF1_I32_B64 |
| |
| // --- description from .arch file --- |
| // D.i = FindFirstOne(S0.u64); |
| // If no ones are found, return -1. |
| // Returns the bit position of the first one from the LSB. |
| void |
| Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = findFirstOne(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_FLBIT_I32_B32 class methods --- |
| |
| Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_flbit_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FLBIT_I32_B32 |
| |
| Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32() |
| { |
| } // ~Inst_SOP1__S_FLBIT_I32_B32 |
| |
| // --- description from .arch file --- |
| // D.i = FindFirstOne(S0.u); |
| // If no ones are found, return -1. |
| // Counts how many zeros before the first one starting from the MSB. |
| void |
| Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = countZeroBitsMsb(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_FLBIT_I32_B64 class methods --- |
| |
| Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_flbit_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FLBIT_I32_B64 |
| |
| Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64() |
| { |
| } // ~Inst_SOP1__S_FLBIT_I32_B64 |
| |
| // --- description from .arch file --- |
| // D.i = FindFirstOne(S0.u64); |
| // If no ones are found, return -1. |
| // Counts how many zeros before the first one starting from the MSB. |
| void |
| Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = countZeroBitsMsb(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_FLBIT_I32 class methods --- |
| |
| Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_flbit_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FLBIT_I32 |
| |
| Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32() |
| { |
| } // ~Inst_SOP1__S_FLBIT_I32 |
| |
| // --- description from .arch file --- |
| // D.i = FirstOppositeSignBit(S0.i); |
| // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. |
| // Counts how many bits in a row (from MSB to LSB) are the same as the |
| // sign bit. |
| void |
| Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = firstOppositeSignBit(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_FLBIT_I32_I64 class methods --- |
| |
| Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_flbit_i32_i64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FLBIT_I32_I64 |
| |
| Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64() |
| { |
| } // ~Inst_SOP1__S_FLBIT_I32_I64 |
| |
| // --- description from .arch file --- |
| // D.i = FirstOppositeSignBit(S0.i64); |
| // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. |
| // Counts how many bits in a row (from MSB to LSB) are the same as the |
| // sign bit. |
| void |
| Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = firstOppositeSignBit(src.rawData()); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_SEXT_I32_I8 class methods --- |
| |
| Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_sext_i32_i8") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_SEXT_I32_I8 |
| |
| Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8() |
| { |
| } // ~Inst_SOP1__S_SEXT_I32_I8 |
| |
| // --- description from .arch file --- |
| // D.i = signext(S0.i[7:0]) (sign extension). |
| void |
| Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = sext<std::numeric_limits<ScalarRegI8>::digits>( |
| bits(src.rawData(), 7, 0)); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_SEXT_I32_I16 class methods --- |
| |
| Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_sext_i32_i16") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_SEXT_I32_I16 |
| |
| Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16() |
| { |
| } // ~Inst_SOP1__S_SEXT_I32_I16 |
| |
| // --- description from .arch file --- |
| // D.i = signext(S0.i[15:0]) (sign extension). |
| void |
| Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = sext<std::numeric_limits<ScalarRegI16>::digits>( |
| bits(src.rawData(), 15, 0)); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_BITSET0_B32 class methods --- |
| |
| Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bitset0_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BITSET0_B32 |
| |
| Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32() |
| { |
| } // ~Inst_SOP1__S_BITSET0_B32 |
| |
| // --- description from .arch file --- |
| // D.u[S0.u[4:0]] = 0. |
| void |
| Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst.setBit(bits(src.rawData(), 4, 0), 0); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_BITSET0_B64 class methods --- |
| |
| Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bitset0_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BITSET0_B64 |
| |
| Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64() |
| { |
| } // ~Inst_SOP1__S_BITSET0_B64 |
| |
| // --- description from .arch file --- |
| // D.u64[S0.u[5:0]] = 0. |
| void |
| Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst.setBit(bits(src.rawData(), 5, 0), 0); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_BITSET1_B32 class methods --- |
| |
| Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bitset1_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BITSET1_B32 |
| |
| Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32() |
| { |
| } // ~Inst_SOP1__S_BITSET1_B32 |
| |
| // --- description from .arch file --- |
| // D.u[S0.u[4:0]] = 1. |
| void |
| Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst.setBit(bits(src.rawData(), 4, 0), 1); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_BITSET1_B64 class methods --- |
| |
| Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bitset1_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BITSET1_B64 |
| |
| Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64() |
| { |
| } // ~Inst_SOP1__S_BITSET1_B64 |
| |
| // --- description from .arch file --- |
| // D.u64[S0.u[5:0]] = 1. |
| void |
| Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst.setBit(bits(src.rawData(), 5, 0), 1); |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_GETPC_B64 class methods --- |
| |
| Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_getpc_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_GETPC_B64 |
| |
| Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64() |
| { |
| } // ~Inst_SOP1__S_GETPC_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = PC + 4. |
| // Destination receives the byte address of the next instruction. |
| void |
| Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Addr pc = gpuDynInst->pc(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| sdst = pc + 4; |
| |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_SETPC_B64 class methods --- |
| |
| Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_setpc_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_SETPC_B64 |
| |
| Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64() |
| { |
| } // ~Inst_SOP1__S_SETPC_B64 |
| |
| // --- description from .arch file --- |
| // PC = S0.u64. |
| // S0.u64 is a byte address of the instruction to jump to. |
| void |
| Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| |
| src.read(); |
| |
| wf->pc(src.rawData()); |
| } // execute |
| // --- Inst_SOP1__S_SWAPPC_B64 class methods --- |
| |
| Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_swappc_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_SWAPPC_B64 |
| |
| Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64() |
| { |
| } // ~Inst_SOP1__S_SWAPPC_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = PC + 4; PC = S0.u64. |
| // S0.u64 is a byte address of the instruction to jump to. |
| void |
| Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| Addr pc = gpuDynInst->pc(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = pc + 4; |
| |
| wf->pc(src.rawData()); |
| sdst.write(); |
| } // execute |
| // --- Inst_SOP1__S_RFE_B64 class methods --- |
| |
| Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_rfe_b64") |
| { |
| } // Inst_SOP1__S_RFE_B64 |
| |
| Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64() |
| { |
| } // ~Inst_SOP1__S_RFE_B64 |
| |
| // --- description from .arch file --- |
| // PRIV = 0; |
| // PC = S0.u64. |
| // Return from exception handler and continue. |
| // This instruction may only be used within a trap handler. |
| void |
| Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } // execute |
| // --- Inst_SOP1__S_AND_SAVEEXEC_B64 class methods --- |
| |
| Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_and_saveexec_b64") |
| { |
| setFlag(ALU); |
| setFlag(ReadsEXEC); |
| setFlag(WritesEXEC); |
| } // Inst_SOP1__S_AND_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_AND_SAVEEXEC_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = EXEC; |
| // EXEC = S0.u64 & EXEC; |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = src.rawData() & wf->execMask().to_ullong(); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_OR_SAVEEXEC_B64 class methods --- |
| |
| Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_or_saveexec_b64") |
| { |
| setFlag(ALU); |
| setFlag(ReadsEXEC); |
| setFlag(WritesEXEC); |
| } // Inst_SOP1__S_OR_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_OR_SAVEEXEC_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = EXEC; |
| // EXEC = S0.u64 | EXEC; |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = src.rawData() | wf->execMask().to_ullong(); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_XOR_SAVEEXEC_B64 class methods --- |
| |
| Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_xor_saveexec_b64") |
| { |
| setFlag(ALU); |
| setFlag(ReadsEXEC); |
| setFlag(WritesEXEC); |
| } // Inst_SOP1__S_XOR_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = EXEC; |
| // EXEC = S0.u64 ^ EXEC; |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = src.rawData() ^ wf->execMask().to_ullong(); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } // execute |
| // --- Inst_SOP1__S_ANDN2_SAVEEXEC_B64 class methods --- |
| |
| Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_andn2_saveexec_b64") |
| { |
| setFlag(ALU); |
| setFlag(ReadsEXEC); |
| setFlag(WritesEXEC); |
| } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64 |
| |
| // --- description from .arch file --- |
| // D.u64 = EXEC; |
| // EXEC = S0.u64 & ~EXEC; |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = src.rawData() &~ wf->execMask().to_ullong(); |
| scc = wf->execMask().any() ? |