| /* |
| * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. |
| * All rights reserved. |
| * |
| * For use for simulation and test purposes only |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * 3. Neither the name of the copyright holder nor the names of its |
| * contributors may be used to endorse or promote products derived from this |
| * software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "arch/amdgpu/gcn3/insts/instructions.hh" |
| |
| #include <cmath> |
| |
| #include "arch/amdgpu/gcn3/insts/inst_util.hh" |
| #include "debug/GCN3.hh" |
| #include "debug/GPUSync.hh" |
| #include "gpu-compute/shader.hh" |
| |
| namespace gem5 |
| { |
| |
| namespace Gcn3ISA |
| { |
| |
| Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_add_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ADD_U32 |
| |
| Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32() |
| { |
| } // ~Inst_SOP2__S_ADD_U32 |
| |
| // D.u = S0.u + S1.u; |
| // SCC = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an unsigned |
| // overflow/carry-out. |
| void |
| Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() + src1.rawData(); |
| scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()) |
| >= 0x100000000ULL ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_sub_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_SUB_U32 |
| |
| Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32() |
| { |
| } // ~Inst_SOP2__S_SUB_U32 |
| |
| // D.u = S0.u - S1.u; |
| // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out. |
| void |
| Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() - src1.rawData(); |
| scc = (src1.rawData() > src0.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_add_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ADD_I32 |
| |
| Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32() |
| { |
| } // ~Inst_SOP2__S_ADD_I32 |
| |
| // D.i = S0.i + S1.i; |
| // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed |
| // overflow. |
| void |
| Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() + src1.rawData(); |
| scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31) |
| && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) |
| ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_sub_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_SUB_I32 |
| |
| Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32() |
| { |
| } // ~Inst_SOP2__S_SUB_I32 |
| |
| // D.i = S0.i - S1.i; |
| // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed |
| // overflow. |
| void |
| Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() - src1.rawData(); |
| scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31) |
| && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_addc_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ADDC_U32 |
| |
| Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32() |
| { |
| } // ~Inst_SOP2__S_ADDC_U32 |
| |
| // D.u = S0.u + S1.u + SCC; |
| // SCC = (S0.u + S1.u + SCC >= 0x100000000ULL ? 1 : 0) is an unsigned |
| // overflow. |
| void |
| Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| scc.read(); |
| |
| sdst = src0.rawData() + src1.rawData() + scc.rawData(); |
| scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData() |
| + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_subb_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_SUBB_U32 |
| |
| Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32() |
| { |
| } // ~Inst_SOP2__S_SUBB_U32 |
| |
| // D.u = S0.u - S1.u - SCC; |
| // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow. |
| void |
| Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| scc.read(); |
| |
| sdst = src0.rawData() - src1.rawData() - scc.rawData(); |
| scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_min_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MIN_I32 |
| |
| Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32() |
| { |
| } // ~Inst_SOP2__S_MIN_I32 |
| |
| // D.i = (S0.i < S1.i) ? S0.i : S1.i; |
| // SCC = 1 if S0 is chosen as the minimum value. |
| void |
| Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = std::min(src0.rawData(), src1.rawData()); |
| scc = (src0.rawData() < src1.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_min_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MIN_U32 |
| |
| Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32() |
| { |
| } // ~Inst_SOP2__S_MIN_U32 |
| |
| // D.u = (S0.u < S1.u) ? S0.u : S1.u; |
| // SCC = 1 if S0 is chosen as the minimum value. |
| void |
| Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = std::min(src0.rawData(), src1.rawData()); |
| scc = (src0.rawData() < src1.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_max_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MAX_I32 |
| |
| Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32() |
| { |
| } // ~Inst_SOP2__S_MAX_I32 |
| |
| // D.i = (S0.i > S1.i) ? S0.i : S1.i; |
| // SCC = 1 if S0 is chosen as the maximum value. |
| void |
| Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = std::max(src0.rawData(), src1.rawData()); |
| scc = (src0.rawData() > src1.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_max_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MAX_U32 |
| |
| Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32() |
| { |
| } // ~Inst_SOP2__S_MAX_U32 |
| |
| // D.u = (S0.u > S1.u) ? S0.u : S1.u; |
| // SCC = 1 if S0 is chosen as the maximum value. |
| void |
| Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = std::max(src0.rawData(), src1.rawData()); |
| scc = (src0.rawData() > src1.rawData()) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_cselect_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_CSELECT_B32 |
| |
| Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32() |
| { |
| } // ~Inst_SOP2__S_CSELECT_B32 |
| |
| // D.u = SCC ? S0.u : S1.u (conditional select). |
| void |
| Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| scc.read(); |
| |
| sdst = scc.rawData() ? src0.rawData() : src1.rawData(); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_cselect_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_CSELECT_B64 |
| |
| Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64() |
| { |
| } // ~Inst_SOP2__S_CSELECT_B64 |
| |
| // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select). |
| void |
| Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| scc.read(); |
| |
| sdst = scc.rawData() ? src0.rawData() : src1.rawData(); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_and_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_AND_B32 |
| |
| Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32() |
| { |
| } // ~Inst_SOP2__S_AND_B32 |
| |
| // D.u = S0.u & S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() & src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_and_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_AND_B64 |
| |
| Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64() |
| { |
| } // ~Inst_SOP2__S_AND_B64 |
| |
| // D.u64 = S0.u64 & S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() & src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_or_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_OR_B32 |
| |
| Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32() |
| { |
| } // ~Inst_SOP2__S_OR_B32 |
| |
| // D.u = S0.u | S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() | src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_or_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_OR_B64 |
| |
| Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64() |
| { |
| } // ~Inst_SOP2__S_OR_B64 |
| |
| // D.u64 = S0.u64 | S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() | src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_xor_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_XOR_B32 |
| |
| Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32() |
| { |
| } // ~Inst_SOP2__S_XOR_B32 |
| |
| // D.u = S0.u ^ S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() ^ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_xor_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_XOR_B64 |
| |
| Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64() |
| { |
| } // ~Inst_SOP2__S_XOR_B64 |
| |
| // D.u64 = S0.u64 ^ S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() ^ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_andn2_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ANDN2_B32 |
| |
| Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32() |
| { |
| } // ~Inst_SOP2__S_ANDN2_B32 |
| |
| // D.u = S0.u & ~S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() &~ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_andn2_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ANDN2_B64 |
| |
| Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64() |
| { |
| } // ~Inst_SOP2__S_ANDN2_B64 |
| |
| // D.u64 = S0.u64 & ~S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() &~ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_orn2_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ORN2_B32 |
| |
| Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32() |
| { |
| } // ~Inst_SOP2__S_ORN2_B32 |
| |
| // D.u = S0.u | ~S1.u; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() |~ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_orn2_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ORN2_B64 |
| |
| Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64() |
| { |
| } // ~Inst_SOP2__S_ORN2_B64 |
| |
| // D.u64 = S0.u64 | ~S1.u64; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() |~ src1.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_nand_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_NAND_B32 |
| |
| Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32() |
| { |
| } // ~Inst_SOP2__S_NAND_B32 |
| |
| // D.u = ~(S0.u & S1.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() & src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_nand_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_NAND_B64 |
| |
| Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64() |
| { |
| } // ~Inst_SOP2__S_NAND_B64 |
| |
| // D.u64 = ~(S0.u64 & S1.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() & src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_nor_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_NOR_B32 |
| |
| Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32() |
| { |
| } // ~Inst_SOP2__S_NOR_B32 |
| |
| // D.u = ~(S0.u | S1.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() | src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_nor_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_NOR_B64 |
| |
| Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64() |
| { |
| } // ~Inst_SOP2__S_NOR_B64 |
| |
| // D.u64 = ~(S0.u64 | S1.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() | src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_xnor_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_XNOR_B32 |
| |
| Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32() |
| { |
| } // ~Inst_SOP2__S_XNOR_B32 |
| |
| // D.u = ~(S0.u ^ S1.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() ^ src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_xnor_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_XNOR_B64 |
| |
| Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64() |
| { |
| } // ~Inst_SOP2__S_XNOR_B64 |
| |
| // D.u64 = ~(S0.u64 ^ S1.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ~(src0.rawData() ^ src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_lshl_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_LSHL_B32 |
| |
| Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32() |
| { |
| } // ~Inst_SOP2__S_LSHL_B32 |
| |
| // D.u = S0.u << S1.u[4:0]; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() << bits(src1.rawData(), 4, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_lshl_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_LSHL_B64 |
| |
| Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64() |
| { |
| } // ~Inst_SOP2__S_LSHL_B64 |
| |
| // D.u64 = S0.u64 << S1.u[5:0]; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() << bits(src1.rawData(), 5, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_lshr_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_LSHR_B32 |
| |
| Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32() |
| { |
| } // ~Inst_SOP2__S_LSHR_B32 |
| |
| // D.u = S0.u >> S1.u[4:0]; |
| // SCC = 1 if result is non-zero. |
| // The vacated bits are set to zero. |
| void |
| Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_lshr_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_LSHR_B64 |
| |
| Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64() |
| { |
| } // ~Inst_SOP2__S_LSHR_B64 |
| |
| // D.u64 = S0.u64 >> S1.u[5:0]; |
| // SCC = 1 if result is non-zero. |
| // The vacated bits are set to zero. |
| void |
| Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_ashr_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ASHR_I32 |
| |
| Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32() |
| { |
| } // ~Inst_SOP2__S_ASHR_I32 |
| |
| // D.i = signext(S0.i) >> S1.u[4:0]; |
| // SCC = 1 if result is non-zero. |
| // The vacated bits are set to the sign bit of the input value. |
| void |
| Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_ashr_i64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ASHR_I64 |
| |
| Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64() |
| { |
| } // ~Inst_SOP2__S_ASHR_I64 |
| |
| // D.i64 = signext(S0.i64) >> S1.u[5:0]; |
| // SCC = 1 if result is non-zero. |
| // The vacated bits are set to the sign bit of the input value. |
| void |
| Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfm_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFM_B32 |
| |
| Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32() |
| { |
| } // ~Inst_SOP2__S_BFM_B32 |
| |
| // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask). |
| void |
| Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1) |
| << bits(src1.rawData(), 4, 0); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfm_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFM_B64 |
| |
| Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64() |
| { |
| } // ~Inst_SOP2__S_BFM_B64 |
| |
| // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask). |
| void |
| Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1) |
| << bits(src1.rawData(), 5, 0); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_mul_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_MUL_I32 |
| |
| Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32() |
| { |
| } // ~Inst_SOP2__S_MUL_I32 |
| |
| // D.i = S0.i * S1.i. |
| void |
| Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = src0.rawData() * src1.rawData(); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfe_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFE_U32 |
| |
| Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32() |
| { |
| } // ~Inst_SOP2__S_BFE_U32 |
| |
| // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is |
| // field width. |
| // D.u = (S0.u >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)) |
| & ((1 << bits(src1.rawData(), 22, 16)) - 1); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfe_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFE_I32 |
| |
| Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32() |
| { |
| } // ~Inst_SOP2__S_BFE_I32 |
| |
| // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is |
| // field width. |
| // D.i = (S0.i >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1); |
| // Sign-extend the result; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)) |
| & ((1 << bits(src1.rawData(), 22, 16)) - 1); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfe_u64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFE_U64 |
| |
| Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64() |
| { |
| } // ~Inst_SOP2__S_BFE_U64 |
| |
| // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is |
| // field width. |
| // D.u64 = (S0.u64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)) |
| & ((1 << bits(src1.rawData(), 22, 16)) - 1); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_bfe_i64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_BFE_I64 |
| |
| Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64() |
| { |
| } // ~Inst_SOP2__S_BFE_I64 |
| |
| // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is |
| // field width. |
| // D.i64 = (S0.i64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1); |
| // Sign-extend result; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)) |
| & ((1 << bits(src1.rawData(), 22, 16)) - 1); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_cbranch_g_fork") |
| { |
| setFlag(Branch); |
| } // Inst_SOP2__S_CBRANCH_G_FORK |
| |
| Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK() |
| { |
| } // ~Inst_SOP2__S_CBRANCH_G_FORK |
| |
| // Conditional branch using branch-stack. |
| // S0 = compare mask(vcc or any sgpr) and |
| // S1 = 64-bit byte address of target instruction. |
| void |
| Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_absdiff_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP2__S_ABSDIFF_I32 |
| |
| Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32() |
| { |
| } // ~Inst_SOP2__S_ABSDIFF_I32 |
| |
| // D.i = S0.i - S1.i; |
| // if (D.i < 0) then D.i = -D.i; |
| // SCC = 1 if result is non-zero. |
| // Compute the absolute value of difference between two values. |
| void |
| Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| sdst = std::abs(src0.rawData() - src1.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64( |
| InFmt_SOP2 *iFmt) |
| : Inst_SOP2(iFmt, "s_rfe_restore_b64") |
| { |
| } // Inst_SOP2__S_RFE_RESTORE_B64 |
| |
| Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64() |
| { |
| } // ~Inst_SOP2__S_RFE_RESTORE_B64 |
| |
| // Return from exception handler and continue. |
| void |
| Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_movk_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_MOVK_I32 |
| |
| Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32() |
| { |
| } // ~Inst_SOPK__S_MOVK_I32 |
| |
| // D.i = signext(SIMM16) (sign extension). |
| void |
| Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| sdst = simm16; |
| |
| sdst.write(); |
| } |
| |
| Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmovk_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMOVK_I32 |
| |
| Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32() |
| { |
| } // ~Inst_SOPK__S_CMOVK_I32 |
| |
| // if (SCC) then D.i = signext(SIMM16); |
| // else NOP. |
| // Conditional move with sign extension. |
| void |
| Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| scc.read(); |
| |
| if (scc.rawData()) { |
| sdst = simm16; |
| sdst.write(); |
| } |
| } |
| |
| Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_eq_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_EQ_I32 |
| |
| Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_EQ_I32 |
| |
| // SCC = (S0.i == signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() == simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_lg_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LG_I32 |
| |
| Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LG_I32 |
| |
| // SCC = (S0.i != signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() != simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_gt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_GT_I32 |
| |
| Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_GT_I32 |
| |
| // SCC = (S0.i > signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() > simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_ge_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_GE_I32 |
| |
| Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_GE_I32 |
| |
| // SCC = (S0.i >= signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() >= simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_lt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LT_I32 |
| |
| Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LT_I32 |
| |
| // SCC = (S0.i < signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() < simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_le_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LE_I32 |
| |
| Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LE_I32 |
| |
| // SCC = (S0.i <= signext(SIMM16)). |
| void |
| Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() <= simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_eq_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_EQ_U32 |
| |
| Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_EQ_U32 |
| |
| // SCC = (S0.u == SIMM16). |
| void |
| Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() == simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_lg_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LG_U32 |
| |
| Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LG_U32 |
| |
| // SCC = (S0.u != SIMM16). |
| void |
| Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() != simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_gt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_GT_U32 |
| |
| Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_GT_U32 |
| |
| // SCC = (S0.u > SIMM16). |
| void |
| Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() > simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_ge_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_GE_U32 |
| |
| Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_GE_U32 |
| |
| // SCC = (S0.u >= SIMM16). |
| void |
| Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() >= simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_lt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LT_U32 |
| |
| Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LT_U32 |
| |
| // SCC = (S0.u < SIMM16). |
| void |
| Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() < simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cmpk_le_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_CMPK_LE_U32 |
| |
| Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32() |
| { |
| } // ~Inst_SOPK__S_CMPK_LE_U32 |
| |
| // SCC = (S0.u <= SIMM16). |
| void |
| Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; |
| ConstScalarOperandU32 src(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| scc = (src.rawData() <= simm16) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_addk_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_ADDK_I32 |
| |
| Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32() |
| { |
| } // ~Inst_SOPK__S_ADDK_I32 |
| |
| // D.i = D.i + signext(SIMM16); |
| // SCC = overflow. |
| void |
| Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ConstScalarOperandI32 src(gpuDynInst, instData.SDST); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); |
| scc = (bits(src.rawData(), 31) == bits(simm16, 15) |
| && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_mulk_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_MULK_I32 |
| |
| Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32() |
| { |
| } // ~Inst_SOPK__S_MULK_I32 |
| |
| // D.i = D.i * signext(SIMM16). |
| void |
| Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| sdst.read(); |
| |
| sdst = sdst.rawData() * (ScalarRegI32)sext<16>(simm16); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_cbranch_i_fork") |
| { |
| setFlag(Branch); |
| } // Inst_SOPK__S_CBRANCH_I_FORK |
| |
| Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK() |
| { |
| } // ~Inst_SOPK__S_CBRANCH_I_FORK |
| |
| // Conditional branch using branch-stack. |
| // S0 = compare mask(vcc or any sgpr), and |
| // SIMM16 = signed DWORD branch offset relative to next instruction. |
| void |
| Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_getreg_b32") |
| { |
| } // Inst_SOPK__S_GETREG_B32 |
| |
| Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32() |
| { |
| } // ~Inst_SOPK__S_GETREG_B32 |
| |
| // D.u = hardware-reg. Read some or all of a hardware register into the |
| // LSBs of D. |
| // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size |
| // is 1..32. |
| void |
| Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_setreg_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_SETREG_B32 |
| |
| Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32() |
| { |
| } // ~Inst_SOPK__S_SETREG_B32 |
| |
| // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware |
| // register. |
| // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size |
| // is 1..32. |
| void |
| Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ScalarRegU32 hwregId = simm16 & 0x3f; |
| ScalarRegU32 offset = (simm16 >> 6) & 31; |
| ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; |
| |
| ScalarOperandU32 hwreg(gpuDynInst, hwregId); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| hwreg.read(); |
| sdst.read(); |
| |
| // Store value from SDST to part of the hardware register. |
| ScalarRegU32 mask = (((1U << size) - 1U) << offset); |
| hwreg = ((hwreg.rawData() & ~mask) |
| | ((sdst.rawData() << offset) & mask)); |
| hwreg.write(); |
| |
| // set MODE register to control the behavior of single precision |
| // floating-point numbers: denormal mode or round mode |
| if (hwregId==1 && size==2 |
| && (offset==4 || offset==0)) { |
| warn_once("Be cautious that s_setreg_b32 has no real effect " |
| "on FP modes: %s\n", gpuDynInst->disassemble()); |
| return; |
| } |
| |
| // panic if not changing MODE of floating-point numbers |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32( |
| InFmt_SOPK *iFmt) |
| : Inst_SOPK(iFmt, "s_setreg_imm32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPK__S_SETREG_IMM32_B32 |
| |
| Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32() |
| { |
| } // ~Inst_SOPK__S_SETREG_IMM32_B32 |
| |
| // Write some or all of the LSBs of IMM32 into a hardware register; this |
| // instruction requires a 32-bit literal constant. |
| // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size |
| // is 1..32. |
| void |
| Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ScalarRegU32 hwregId = simm16 & 0x3f; |
| ScalarRegU32 offset = (simm16 >> 6) & 31; |
| ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; |
| |
| ScalarOperandU32 hwreg(gpuDynInst, hwregId); |
| ScalarRegU32 simm32 = extData.imm_u32; |
| hwreg.read(); |
| |
| ScalarRegU32 mask = (((1U << size) - 1U) << offset); |
| hwreg = ((hwreg.rawData() & ~mask) |
| | ((simm32 << offset) & mask)); |
| hwreg.write(); |
| |
| if (hwregId==1 && size==2 |
| && (offset==4 || offset==0)) { |
| warn_once("Be cautious that s_setreg_imm32_b32 has no real effect " |
| "on FP modes: %s\n", gpuDynInst->disassemble()); |
| return; |
| } |
| |
| // panic if not changing MODE of floating-point numbers |
| panicUnimplemented(); |
| } |
| |
| Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_mov_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_MOV_B32 |
| |
| Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32() |
| { |
| } // ~Inst_SOP1__S_MOV_B32 |
| |
| // D.u = S0.u. |
| void |
| Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = src.rawData(); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_mov_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_MOV_B64 |
| |
| Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64() |
| { |
| } // ~Inst_SOP1__S_MOV_B64 |
| |
| // D.u64 = S0.u64. |
| void |
| Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = src.rawData(); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_cmov_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_CMOV_B32 |
| |
| Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32() |
| { |
| } // ~Inst_SOP1__S_CMOV_B32 |
| |
| // if (SCC) then D.u = S0.u; |
| // else NOP. |
| // Conditional move. |
| void |
| Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| scc.read(); |
| |
| if (scc.rawData()) { |
| sdst = src.rawData(); |
| sdst.write(); |
| } |
| } |
| |
| Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_cmov_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_CMOV_B64 |
| |
| Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64() |
| { |
| } // ~Inst_SOP1__S_CMOV_B64 |
| |
| // if (SCC) then D.u64 = S0.u64; |
| // else NOP. |
| // Conditional move. |
| void |
| Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| scc.read(); |
| |
| if (scc.rawData()) { |
| sdst = src.rawData(); |
| sdst.write(); |
| } |
| } |
| |
| Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_not_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_NOT_B32 |
| |
| Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32() |
| { |
| } // ~Inst_SOP1__S_NOT_B32 |
| |
| // D.u = ~S0.u; |
| // SCC = 1 if result is non-zero. |
| // Bitwise negation. |
| void |
| Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = ~src.rawData(); |
| |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_not_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_NOT_B64 |
| |
| Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64() |
| { |
| } // ~Inst_SOP1__S_NOT_B64 |
| |
| // D.u64 = ~S0.u64; |
| // SCC = 1 if result is non-zero. |
| // Bitwise negation. |
| void |
| Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = ~src.rawData(); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_wqm_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_WQM_B32 |
| |
| Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32() |
| { |
| } // ~Inst_SOP1__S_WQM_B32 |
| |
| // Computes whole quad mode for an active/valid mask. |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wholeQuadMode(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_wqm_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_WQM_B64 |
| |
| Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64() |
| { |
| } // ~Inst_SOP1__S_WQM_B64 |
| |
| // Computes whole quad mode for an active/valid mask. |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wholeQuadMode(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_brev_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BREV_B32 |
| |
| Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32() |
| { |
| } // ~Inst_SOP1__S_BREV_B32 |
| |
| // D.u[31:0] = S0.u[0:31] (reverse bits). |
| void |
| Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = reverseBits(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_brev_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BREV_B64 |
| |
| Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64() |
| { |
| } // ~Inst_SOP1__S_BREV_B64 |
| |
| // D.u64[63:0] = S0.u64[0:63] (reverse bits). |
| void |
| Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = reverseBits(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bcnt0_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BCNT0_I32_B32 |
| |
| Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32() |
| { |
| } // ~Inst_SOP1__S_BCNT0_I32_B32 |
| |
| // D.i = CountZeroBits(S0.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = countZeroBits(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bcnt0_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BCNT0_I32_B64 |
| |
| Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64() |
| { |
| } // ~Inst_SOP1__S_BCNT0_I32_B64 |
| |
| // D.i = CountZeroBits(S0.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = countZeroBits(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bcnt1_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BCNT1_I32_B32 |
| |
| Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32() |
| { |
| } // ~Inst_SOP1__S_BCNT1_I32_B32 |
| |
| // D.i = CountOneBits(S0.u); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = popCount(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bcnt1_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BCNT1_I32_B64 |
| |
| Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64() |
| { |
| } // ~Inst_SOP1__S_BCNT1_I32_B64 |
| |
| // D.i = CountOneBits(S0.u64); |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = popCount(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_ff0_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FF0_I32_B32 |
| |
| Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32() |
| { |
| } // ~Inst_SOP1__S_FF0_I32_B32 |
| |
| // D.i = FindFirstZero(S0.u); |
| // If no zeros are found, return -1. |
| // Returns the bit position of the first zero from the LSB. |
| void |
| Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = findFirstZero(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_ff0_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FF0_I32_B64 |
| |
| Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64() |
| { |
| } // ~Inst_SOP1__S_FF0_I32_B64 |
| |
| // D.i = FindFirstZero(S0.u64); |
| // If no zeros are found, return -1. |
| // Returns the bit position of the first zero from the LSB. |
| void |
| Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = findFirstZero(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_ff1_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FF1_I32_B32 |
| |
| Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32() |
| { |
| } // ~Inst_SOP1__S_FF1_I32_B32 |
| |
| // D.i = FindFirstOne(S0.u); |
| // If no ones are found, return -1. |
| // Returns the bit position of the first one from the LSB. |
| void |
| Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = findFirstOne(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_ff1_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FF1_I32_B64 |
| |
| Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64() |
| { |
| } // ~Inst_SOP1__S_FF1_I32_B64 |
| |
| // D.i = FindFirstOne(S0.u64); |
| // If no ones are found, return -1. |
| // Returns the bit position of the first one from the LSB. |
| void |
| Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = findFirstOne(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_flbit_i32_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FLBIT_I32_B32 |
| |
| Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32() |
| { |
| } // ~Inst_SOP1__S_FLBIT_I32_B32 |
| |
| // D.i = FindFirstOne(S0.u); |
| // If no ones are found, return -1. |
| // Counts how many zeros before the first one starting from the MSB. |
| void |
| Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = countZeroBitsMsb(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_flbit_i32_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FLBIT_I32_B64 |
| |
| Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64() |
| { |
| } // ~Inst_SOP1__S_FLBIT_I32_B64 |
| |
| // D.i = FindFirstOne(S0.u64); |
| // If no ones are found, return -1. |
| // Counts how many zeros before the first one starting from the MSB. |
| void |
| Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = countZeroBitsMsb(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_flbit_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FLBIT_I32 |
| |
| Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32() |
| { |
| } // ~Inst_SOP1__S_FLBIT_I32 |
| |
| // D.i = FirstOppositeSignBit(S0.i); |
| // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. |
| // Counts how many bits in a row (from MSB to LSB) are the same as the |
| // sign bit. |
| void |
| Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = firstOppositeSignBit(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_flbit_i32_i64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_FLBIT_I32_I64 |
| |
| Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64() |
| { |
| } // ~Inst_SOP1__S_FLBIT_I32_I64 |
| |
| // D.i = FirstOppositeSignBit(S0.i64); |
| // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. |
| // Counts how many bits in a row (from MSB to LSB) are the same as the |
| // sign bit. |
| void |
| Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = firstOppositeSignBit(src.rawData()); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_sext_i32_i8") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_SEXT_I32_I8 |
| |
| Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8() |
| { |
| } // ~Inst_SOP1__S_SEXT_I32_I8 |
| |
| // D.i = signext(S0.i[7:0]) (sign extension). |
| void |
| Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = sext<std::numeric_limits<ScalarRegI8>::digits>( |
| bits(src.rawData(), 7, 0)); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_sext_i32_i16") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_SEXT_I32_I16 |
| |
| Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16() |
| { |
| } // ~Inst_SOP1__S_SEXT_I32_I16 |
| |
| // D.i = signext(S0.i[15:0]) (sign extension). |
| void |
| Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = sext<std::numeric_limits<ScalarRegI16>::digits>( |
| bits(src.rawData(), 15, 0)); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bitset0_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BITSET0_B32 |
| |
| Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32() |
| { |
| } // ~Inst_SOP1__S_BITSET0_B32 |
| |
| // D.u[S0.u[4:0]] = 0. |
| void |
| Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst.setBit(bits(src.rawData(), 4, 0), 0); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bitset0_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BITSET0_B64 |
| |
| Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64() |
| { |
| } // ~Inst_SOP1__S_BITSET0_B64 |
| |
| // D.u64[S0.u[5:0]] = 0. |
| void |
| Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst.setBit(bits(src.rawData(), 5, 0), 0); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bitset1_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BITSET1_B32 |
| |
| Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32() |
| { |
| } // ~Inst_SOP1__S_BITSET1_B32 |
| |
| // D.u[S0.u[4:0]] = 1. |
| void |
| Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst.setBit(bits(src.rawData(), 4, 0), 1); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_bitset1_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_BITSET1_B64 |
| |
| Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64() |
| { |
| } // ~Inst_SOP1__S_BITSET1_B64 |
| |
| // D.u64[S0.u[5:0]] = 1. |
| void |
| Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst.setBit(bits(src.rawData(), 5, 0), 1); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_getpc_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_GETPC_B64 |
| |
| Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64() |
| { |
| } // ~Inst_SOP1__S_GETPC_B64 |
| |
| // D.u64 = PC + 4. |
| // Destination receives the byte address of the next instruction. |
| void |
| Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| Addr pc = wf->pc(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| sdst = pc + 4; |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_setpc_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_SETPC_B64 |
| |
| Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64() |
| { |
| } // ~Inst_SOP1__S_SETPC_B64 |
| |
| // PC = S0.u64. |
| // S0.u64 is a byte address of the instruction to jump to. |
| void |
| Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| |
| src.read(); |
| |
| wf->pc(src.rawData()); |
| } |
| |
| Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_swappc_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_SWAPPC_B64 |
| |
| Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64() |
| { |
| } // ~Inst_SOP1__S_SWAPPC_B64 |
| |
| // D.u64 = PC + 4; PC = S0.u64. |
| // S0.u64 is a byte address of the instruction to jump to. |
| void |
| Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| Addr pc = wf->pc(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = pc + 4; |
| |
| wf->pc(src.rawData()); |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_rfe_b64") |
| { |
| } // Inst_SOP1__S_RFE_B64 |
| |
| Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64() |
| { |
| } // ~Inst_SOP1__S_RFE_B64 |
| |
| // Return from exception handler and continue. |
| void |
| Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_and_saveexec_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_AND_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_AND_SAVEEXEC_B64 |
| |
| // D.u64 = EXEC; |
| // EXEC = S0.u64 & EXEC; |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = src.rawData() & wf->execMask().to_ullong(); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_or_saveexec_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_OR_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_OR_SAVEEXEC_B64 |
| |
| // D.u64 = EXEC; |
| // EXEC = S0.u64 | EXEC; |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = src.rawData() | wf->execMask().to_ullong(); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_xor_saveexec_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_XOR_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64 |
| |
| // D.u64 = EXEC; |
| // EXEC = S0.u64 ^ EXEC; |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = src.rawData() ^ wf->execMask().to_ullong(); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_andn2_saveexec_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64 |
| |
| // D.u64 = EXEC; |
| // EXEC = S0.u64 & ~EXEC; |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = src.rawData() &~ wf->execMask().to_ullong(); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_orn2_saveexec_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_ORN2_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64 |
| |
| // D.u64 = EXEC; |
| // EXEC = S0.u64 | ~EXEC; |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = src.rawData() |~ wf->execMask().to_ullong(); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_nand_saveexec_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_NAND_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64 |
| |
| // D.u64 = EXEC; |
| // EXEC = ~(S0.u64 & EXEC); |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong()); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_nor_saveexec_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_NOR_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64 |
| |
| // D.u64 = EXEC; |
| // EXEC = ~(S0.u64 | EXEC); |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong()); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_xnor_saveexec_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_XNOR_SAVEEXEC_B64 |
| |
| Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64() |
| { |
| } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64 |
| |
| // D.u64 = EXEC; |
| // EXEC = ~(S0.u64 ^ EXEC); |
| // SCC = 1 if the new value of EXEC is non-zero. |
| void |
| Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = wf->execMask().to_ullong(); |
| wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong()); |
| scc = wf->execMask().any() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_quadmask_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_QUADMASK_B32 |
| |
| Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32() |
| { |
| } // ~Inst_SOP1__S_QUADMASK_B32 |
| |
| // D.u = QuadMask(S0.u): |
| // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = quadMask(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_quadmask_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_QUADMASK_B64 |
| |
| Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64() |
| { |
| } // ~Inst_SOP1__S_QUADMASK_B64 |
| |
| // D.u64 = QuadMask(S0.u64): |
| // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0; |
| // SCC = 1 if result is non-zero. |
| void |
| Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = quadMask(src.rawData()); |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_movrels_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_MOVRELS_B32 |
| |
| Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32() |
| { |
| } // ~Inst_SOP1__S_MOVRELS_B32 |
| |
| // D.u = SGPR[S0.u + M0.u].u (move from relative source). |
| void |
| Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 m0(gpuDynInst, REG_M0); |
| m0.read(); |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData()); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = src.rawData(); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_movrels_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_MOVRELS_B64 |
| |
| Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64() |
| { |
| } // ~Inst_SOP1__S_MOVRELS_B64 |
| |
| // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source). |
| // The index in M0.u must be even for this operation. |
| void |
| Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 m0(gpuDynInst, REG_M0); |
| m0.read(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData()); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src.read(); |
| |
| sdst = src.rawData(); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_movreld_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_MOVRELD_B32 |
| |
| Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32() |
| { |
| } // ~Inst_SOP1__S_MOVRELD_B32 |
| |
| // SGPR[D.u + M0.u].u = S0.u (move to relative destination). |
| void |
| Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 m0(gpuDynInst, REG_M0); |
| m0.read(); |
| ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData()); |
| |
| src.read(); |
| |
| sdst = src.rawData(); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_movreld_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_MOVRELD_B64 |
| |
| Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64() |
| { |
| } // ~Inst_SOP1__S_MOVRELD_B64 |
| |
| // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination). |
| // The index in M0.u must be even for this operation. |
| void |
| Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 m0(gpuDynInst, REG_M0); |
| m0.read(); |
| ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData()); |
| |
| src.read(); |
| |
| sdst = src.rawData(); |
| |
| sdst.write(); |
| } |
| |
| Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_cbranch_join") |
| { |
| setFlag(Branch); |
| } // Inst_SOP1__S_CBRANCH_JOIN |
| |
| Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN() |
| { |
| } // ~Inst_SOP1__S_CBRANCH_JOIN |
| |
| // Conditional branch join point (end of conditional branch block). |
| void |
| Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_abs_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_ABS_I32 |
| |
| Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32() |
| { |
| } // ~Inst_SOP1__S_ABS_I32 |
| |
| // if (S.i < 0) then D.i = -S.i; |
| // else D.i = S.i; |
| // SCC = 1 if result is non-zero. |
| // Integer absolute value. |
| void |
| Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); |
| ScalarOperandI32 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src.read(); |
| |
| sdst = std::abs(src.rawData()); |
| |
| scc = sdst.rawData() ? 1 : 0; |
| |
| sdst.write(); |
| scc.write(); |
| } |
| |
| Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_mov_fed_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOP1__S_MOV_FED_B32 |
| |
| Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32() |
| { |
| } // ~Inst_SOP1__S_MOV_FED_B32 |
| |
| // D.u = S0.u. |
| void |
| Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX( |
| InFmt_SOP1 *iFmt) |
| : Inst_SOP1(iFmt, "s_set_gpr_idx_idx") |
| { |
| } // Inst_SOP1__S_SET_GPR_IDX_IDX |
| |
| Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX() |
| { |
| } // ~Inst_SOP1__S_SET_GPR_IDX_IDX |
| |
| // M0[7:0] = S0.u[7:0]. |
| // Modify the index used in vector GPR indexing. |
| void |
| Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_eq_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_EQ_I32 |
| |
| Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32() |
| { |
| } // ~Inst_SOPC__S_CMP_EQ_I32 |
| |
| // SCC = (S0.i == S1.i). |
| void |
| Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() == src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_lg_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_LG_I32 |
| |
| Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32() |
| { |
| } // ~Inst_SOPC__S_CMP_LG_I32 |
| |
| // SCC = (S0.i != S1.i). |
| void |
| Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() != src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_gt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_GT_I32 |
| |
| Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32() |
| { |
| } // ~Inst_SOPC__S_CMP_GT_I32 |
| |
| // SCC = (S0.i > S1.i). |
| void |
| Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() > src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_ge_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_GE_I32 |
| |
| Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32() |
| { |
| } // ~Inst_SOPC__S_CMP_GE_I32 |
| |
| // SCC = (S0.i >= S1.i). |
| void |
| Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_lt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_LT_I32 |
| |
| Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32() |
| { |
| } // ~Inst_SOPC__S_CMP_LT_I32 |
| |
| // SCC = (S0.i < S1.i). |
| void |
| Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() < src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_le_i32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_LE_I32 |
| |
| Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32() |
| { |
| } // ~Inst_SOPC__S_CMP_LE_I32 |
| |
| // SCC = (S0.i <= S1.i). |
| void |
| Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_eq_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_EQ_U32 |
| |
| Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32() |
| { |
| } // ~Inst_SOPC__S_CMP_EQ_U32 |
| |
| // SCC = (S0.u == S1.u). |
| void |
| Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() == src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_lg_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_LG_U32 |
| |
| Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32() |
| { |
| } // ~Inst_SOPC__S_CMP_LG_U32 |
| |
| // SCC = (S0.u != S1.u). |
| void |
| Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() != src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_gt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_GT_U32 |
| |
| Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32() |
| { |
| } // ~Inst_SOPC__S_CMP_GT_U32 |
| |
| // SCC = (S0.u > S1.u). |
| void |
| Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() > src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_ge_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_GE_U32 |
| |
| Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32() |
| { |
| } // ~Inst_SOPC__S_CMP_GE_U32 |
| |
| // SCC = (S0.u >= S1.u). |
| void |
| Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_lt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_LT_U32 |
| |
| Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32() |
| { |
| } // ~Inst_SOPC__S_CMP_LT_U32 |
| |
| // SCC = (S0.u < S1.u). |
| void |
| Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() < src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_le_u32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_LE_U32 |
| |
| Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32() |
| { |
| } // ~Inst_SOPC__S_CMP_LE_U32 |
| |
| // SCC = (S0.u <= S1.u). |
| void |
| Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_bitcmp0_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_BITCMP0_B32 |
| |
| Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32() |
| { |
| } // ~Inst_SOPC__S_BITCMP0_B32 |
| |
| // SCC = (S0.u[S1.u[4:0]] == 0). |
| void |
| Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_bitcmp1_b32") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_BITCMP1_B32 |
| |
| Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32() |
| { |
| } // ~Inst_SOPC__S_BITCMP1_B32 |
| |
| // SCC = (S0.u[S1.u[4:0]] == 1). |
| void |
| Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_bitcmp0_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_BITCMP0_B64 |
| |
| Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64() |
| { |
| } // ~Inst_SOPC__S_BITCMP0_B64 |
| |
| // SCC = (S0.u64[S1.u[5:0]] == 0). |
| void |
| Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_bitcmp1_b64") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_BITCMP1_B64 |
| |
| Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64() |
| { |
| } // ~Inst_SOPC__S_BITCMP1_B64 |
| |
| // SCC = (S0.u64[S1.u[5:0]] == 1). |
| void |
| Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_setvskip") |
| { |
| setFlag(UnconditionalJump); |
| } // Inst_SOPC__S_SETVSKIP |
| |
| Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP() |
| { |
| } // ~Inst_SOPC__S_SETVSKIP |
| |
| // VSKIP = S0.u[S1.u[4:0]]. |
| // Enables and disables VSKIP mode. |
| // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are |
| // issued. |
| void |
| Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_set_gpr_idx_on") |
| { |
| } // Inst_SOPC__S_SET_GPR_IDX_ON |
| |
| Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON() |
| { |
| } // ~Inst_SOPC__S_SET_GPR_IDX_ON |
| |
| // MODE.gpr_idx_en = 1; |
| // M0[7:0] = S0.u[7:0]; |
| // M0[15:12] = SIMM4 (direct contents of S1 field); |
| // Remaining bits of M0 are unmodified. |
| // Enable GPR indexing mode. Vector operations after this will perform |
| // relative GPR addressing based on the contents of M0. |
| // The raw contents of the S1 field are read and used to set the enable |
| // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and |
| // S1[3] = VDST_REL. |
| void |
| Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_eq_u64") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_EQ_U64 |
| |
| Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64() |
| { |
| } // ~Inst_SOPC__S_CMP_EQ_U64 |
| |
| // SCC = (S0.i64 == S1.i64). |
| void |
| Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() == src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt) |
| : Inst_SOPC(iFmt, "s_cmp_lg_u64") |
| { |
| setFlag(ALU); |
| } // Inst_SOPC__S_CMP_LG_U64 |
| |
| Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64() |
| { |
| } // ~Inst_SOPC__S_CMP_LG_U64 |
| |
| // SCC = (S0.i64 != S1.i64). |
| void |
| Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); |
| ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); |
| ScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| src0.read(); |
| src1.read(); |
| |
| scc = (src0.rawData() != src1.rawData()) ? 1 : 0; |
| |
| scc.write(); |
| } |
| |
| Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_nop") |
| { |
| setFlag(Nop); |
| } // Inst_SOPP__S_NOP |
| |
| Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP() |
| { |
| } // ~Inst_SOPP__S_NOP |
| |
| // Do nothing. |
| void |
| Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_endpgm") |
| { |
| setFlag(EndOfKernel); |
| } // Inst_SOPP__S_ENDPGM |
| |
| Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM() |
| { |
| } // ~Inst_SOPP__S_ENDPGM |
| |
| // End of program; terminate wavefront. |
| void |
| Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ComputeUnit *cu = gpuDynInst->computeUnit(); |
| |
| // delete extra instructions fetched for completed work-items |
| wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1, |
| wf->instructionBuffer.end()); |
| |
| if (wf->pendingFetch) { |
| wf->dropFetch = true; |
| } |
| |
| wf->computeUnit->fetchStage.fetchUnit(wf->simdId) |
| .flushBuf(wf->wfSlotId); |
| wf->setStatus(Wavefront::S_STOPPED); |
| |
| int refCount = wf->computeUnit->getLds() |
| .decreaseRefCounter(wf->dispatchId, wf->wgId); |
| |
| /** |
| * The parent WF of this instruction is exiting, therefore |
| * it should not participate in this barrier any longer. This |
| * prevents possible deadlock issues if WFs exit early. |
| */ |
| int bar_id = WFBarrier::InvalidID; |
| if (wf->hasBarrier()) { |
| assert(wf->getStatus() != Wavefront::S_BARRIER); |
| bar_id = wf->barrierId(); |
| assert(bar_id != WFBarrier::InvalidID); |
| wf->releaseBarrier(); |
| cu->decMaxBarrierCnt(bar_id); |
| DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the " |
| "program and decrementing max barrier count for " |
| "barrier Id%d. New max count: %d.\n", cu->cu_id, |
| wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id, |
| cu->maxBarrierCnt(bar_id)); |
| } |
| |
| DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", |
| wf->computeUnit->cu_id, wf->wgId, refCount); |
| |
| wf->computeUnit->registerManager->freeRegisters(wf); |
| wf->computeUnit->stats.completedWfs++; |
| wf->computeUnit->activeWaves--; |
| |
| panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less " |
| "than zero\n", wf->computeUnit->cu_id); |
| |
| DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n", |
| wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId); |
| |
| for (int i = 0; i < wf->vecReads.size(); i++) { |
| if (wf->rawDist.find(i) != wf->rawDist.end()) { |
| wf->stats.readsPerWrite.sample(wf->vecReads.at(i)); |
| } |
| } |
| wf->vecReads.clear(); |
| wf->rawDist.clear(); |
| wf->lastInstExec = 0; |
| |
| if (!refCount) { |
| /** |
| * If all WFs have finished, and hence the WG has finished, |
| * then we can free up the barrier belonging to the parent |
| * WG, but only if we actually used a barrier (i.e., more |
| * than one WF in the WG). |
| */ |
| if (bar_id != WFBarrier::InvalidID) { |
| DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are " |
| "now complete. Releasing barrier Id%d.\n", cu->cu_id, |
| wf->simdId, wf->wfSlotId, wf->wfDynId, |
| wf->barrierId()); |
| cu->releaseBarrier(bar_id); |
| } |
| |
| /** |
| * Last wavefront of the workgroup has executed return. If the |
| * workgroup is not the final one in the kernel, then simply |
| * retire it; however, if it is the final one (i.e., indicating |
| * the kernel end) then release operation is needed. |
| */ |
| |
| // check whether the workgroup is indicating the kernel end (i.e., |
| // the last workgroup in the kernel). |
| bool kernelEnd = |
| wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf); |
| // further check whether 'release @ kernel end' is needed |
| bool relNeeded = |
| wf->computeUnit->shader->impl_kern_end_rel; |
| |
| // if not a kernel end or no release needed, retire the workgroup |
| // directly |
| if (!kernelEnd || !relNeeded) { |
| wf->computeUnit->shader->dispatcher().notifyWgCompl(wf); |
| wf->setStatus(Wavefront::S_STOPPED); |
| wf->computeUnit->stats.completedWGs++; |
| |
| return; |
| } |
| |
| /** |
| * If a kernel end and release needed, inject a memory sync and |
| * retire the workgroup after receving all acks. |
| */ |
| setFlag(MemSync); |
| setFlag(GlobalSegment); |
| // Notify Memory System of Kernel Completion |
| wf->setStatus(Wavefront::S_RETURNING); |
| gpuDynInst->simdId = wf->simdId; |
| gpuDynInst->wfSlotId = wf->wfSlotId; |
| gpuDynInst->wfDynId = wf->wfDynId; |
| |
| DPRINTF(GPUExec, "inject global memory fence for CU%d: " |
| "WF[%d][%d][%d]\n", wf->computeUnit->cu_id, |
| wf->simdId, wf->wfSlotId, wf->wfDynId); |
| |
| // call shader to prepare the flush operations |
| wf->computeUnit->shader->prepareFlush(gpuDynInst); |
| |
| wf->computeUnit->stats.completedWGs++; |
| } else { |
| wf->computeUnit->shader->dispatcher().scheduleDispatch(); |
| } |
| } |
| |
| |
| Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_branch") |
| { |
| setFlag(Branch); |
| } // Inst_SOPP__S_BRANCH |
| |
| Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH() |
| { |
| } // ~Inst_SOPP__S_BRANCH |
| |
| // PC = PC + signext(SIMM16 * 4) + 4 (short jump). |
| void |
| Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| Addr pc = wf->pc(); |
| ScalarRegI16 simm16 = instData.SIMM16; |
| |
| pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; |
| |
| wf->pc(pc); |
| } |
| |
| Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_wakeup") |
| { |
| } // Inst_SOPP__S_WAKEUP |
| |
| Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP() |
| { |
| } // ~Inst_SOPP__S_WAKEUP |
| |
| // Allow a wave to wakeup all the other waves in its workgroup to force |
| // them to wake up immediately from an S_SLEEP instruction. The wakeup is |
| // ignored if the waves are not sleeping. |
| void |
| Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_scc0") |
| { |
| setFlag(Branch); |
| } // Inst_SOPP__S_CBRANCH_SCC0 |
| |
| Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_SCC0 |
| |
| // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; |
| // else NOP. |
| void |
| Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| Addr pc = wf->pc(); |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| scc.read(); |
| |
| if (!scc.rawData()) { |
| pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; |
| } |
| |
| wf->pc(pc); |
| } |
| |
| Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_scc1") |
| { |
| setFlag(Branch); |
| } // Inst_SOPP__S_CBRANCH_SCC1 |
| |
| Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_SCC1 |
| |
| // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4; |
| // else NOP. |
| void |
| Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| Addr pc = wf->pc(); |
| ScalarRegI16 simm16 = instData.SIMM16; |
| ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); |
| |
| scc.read(); |
| |
| if (scc.rawData()) { |
| pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; |
| } |
| |
| wf->pc(pc); |
| } |
| |
| Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_vccz") |
| { |
| setFlag(Branch); |
| setFlag(ReadsVCC); |
| } // Inst_SOPP__S_CBRANCH_VCCZ |
| |
| Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_VCCZ |
| |
| // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; |
| // else NOP. |
| void |
| Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| Addr pc = wf->pc(); |
| ScalarRegI16 simm16 = instData.SIMM16; |
| |
| vcc.read(); |
| |
| if (!vcc.rawData()) { |
| pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; |
| } |
| |
| wf->pc(pc); |
| } |
| |
| Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_vccnz") |
| { |
| setFlag(Branch); |
| setFlag(ReadsVCC); |
| } // Inst_SOPP__S_CBRANCH_VCCNZ |
| |
| Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_VCCNZ |
| |
| // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4; |
| // else NOP. |
| void |
| Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| vcc.read(); |
| |
| if (vcc.rawData()) { |
| Addr pc = wf->pc(); |
| ScalarRegI16 simm16 = instData.SIMM16; |
| pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; |
| wf->pc(pc); |
| } |
| } |
| |
| Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_execz") |
| { |
| setFlag(Branch); |
| } // Inst_SOPP__S_CBRANCH_EXECZ |
| |
| Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_EXECZ |
| |
| // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4; |
| // else NOP. |
| void |
| Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| Addr pc = wf->pc(); |
| ScalarRegI16 simm16 = instData.SIMM16; |
| pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; |
| wf->pc(pc); |
| } |
| } |
| |
| Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_execnz") |
| { |
| setFlag(Branch); |
| } // Inst_SOPP__S_CBRANCH_EXECNZ |
| |
| Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_EXECNZ |
| |
| // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4; |
| // else NOP. |
| void |
| Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().any()) { |
| Addr pc = wf->pc(); |
| ScalarRegI16 simm16 = instData.SIMM16; |
| pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; |
| wf->pc(pc); |
| } |
| } |
| |
| Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_barrier") |
| { |
| setFlag(MemBarrier); |
| } // Inst_SOPP__S_BARRIER |
| |
| Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER() |
| { |
| } // ~Inst_SOPP__S_BARRIER |
| |
| /** |
| * Synchronize waves within a workgroup. If not all waves of the workgroup |
| * have been created yet, wait for entire group before proceeding. If some |
| * waves in the wokgroup have already terminated, this waits on only the |
| * surviving waves. |
| */ |
| void |
| Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ComputeUnit *cu = gpuDynInst->computeUnit(); |
| |
| if (wf->hasBarrier()) { |
| int bar_id = wf->barrierId(); |
| cu->incNumAtBarrier(bar_id); |
| DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at " |
| "barrier Id%d. %d waves now at barrier, %d waves " |
| "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId, |
| wf->wfDynId, bar_id, cu->numAtBarrier(bar_id), |
| cu->numYetToReachBarrier(bar_id)); |
| } |
| } // execute |
| // --- Inst_SOPP__S_SETKILL class methods --- |
| |
| Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_setkill") |
| { |
| } // Inst_SOPP__S_SETKILL |
| |
| Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL() |
| { |
| } // ~Inst_SOPP__S_SETKILL |
| |
| void |
| Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_waitcnt") |
| { |
| setFlag(ALU); |
| setFlag(Waitcnt); |
| } // Inst_SOPP__S_WAITCNT |
| |
| Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT() |
| { |
| } // ~Inst_SOPP__S_WAITCNT |
| |
| // Wait for the counts of outstanding lds, vector-memory and |
| // export/vmem-write-data to be at or below the specified levels. |
| // SIMM16[3:0] = vmcount (vector memory operations), |
| // SIMM16[6:4] = export/mem-write-data count, |
| // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count). |
| void |
| Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 vm_cnt = 0; |
| ScalarRegI32 exp_cnt = 0; |
| ScalarRegI32 lgkm_cnt = 0; |
| vm_cnt = bits<ScalarRegI16>(instData.SIMM16, 3, 0); |
| exp_cnt = bits<ScalarRegI16>(instData.SIMM16, 6, 4); |
| lgkm_cnt = bits<ScalarRegI16>(instData.SIMM16, 12, 8); |
| gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt); |
| } |
| |
| Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_sethalt") |
| { |
| } // Inst_SOPP__S_SETHALT |
| |
| Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT() |
| { |
| } // ~Inst_SOPP__S_SETHALT |
| |
| void |
| Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_sleep") |
| { |
| setFlag(ALU); |
| setFlag(Sleep); |
| } // Inst_SOPP__S_SLEEP |
| |
| Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP() |
| { |
| } // ~Inst_SOPP__S_SLEEP |
| |
| // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks. |
| void |
| Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; |
| gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP); |
| // sleep duration is specified in multiples of 64 cycles |
| gpuDynInst->wavefront()->setSleepTime(64 * simm16); |
| } // execute |
| // --- Inst_SOPP__S_SETPRIO class methods --- |
| |
| Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_setprio") |
| { |
| } // Inst_SOPP__S_SETPRIO |
| |
| Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO() |
| { |
| } // ~Inst_SOPP__S_SETPRIO |
| |
| // User settable wave priority is set to SIMM16[1:0]. 0 = lowest, |
| // 3 = highest. |
| void |
| Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_sendmsg") |
| { |
| } // Inst_SOPP__S_SENDMSG |
| |
| Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG() |
| { |
| } // ~Inst_SOPP__S_SENDMSG |
| |
| void |
| Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_sendmsghalt") |
| { |
| } // Inst_SOPP__S_SENDMSGHALT |
| |
| Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT() |
| { |
| } // ~Inst_SOPP__S_SENDMSGHALT |
| |
| void |
| Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_trap") |
| { |
| } // Inst_SOPP__S_TRAP |
| |
| Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP() |
| { |
| } // ~Inst_SOPP__S_TRAP |
| |
| // Enter the trap handler. |
| void |
| Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_icache_inv") |
| { |
| } // Inst_SOPP__S_ICACHE_INV |
| |
| Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV() |
| { |
| } // ~Inst_SOPP__S_ICACHE_INV |
| |
| // Invalidate entire L1 instruction cache. |
| void |
| Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_incperflevel") |
| { |
| } // Inst_SOPP__S_INCPERFLEVEL |
| |
| Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL() |
| { |
| } // ~Inst_SOPP__S_INCPERFLEVEL |
| |
| void |
| Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_decperflevel") |
| { |
| } // Inst_SOPP__S_DECPERFLEVEL |
| |
| Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL() |
| { |
| } // ~Inst_SOPP__S_DECPERFLEVEL |
| |
| void |
| Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_ttracedata") |
| { |
| } // Inst_SOPP__S_TTRACEDATA |
| |
| Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA() |
| { |
| } // ~Inst_SOPP__S_TTRACEDATA |
| |
| void |
| Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS( |
| InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_cdbgsys") |
| { |
| setFlag(Branch); |
| } // Inst_SOPP__S_CBRANCH_CDBGSYS |
| |
| Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_CDBGSYS |
| |
| void |
| Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER( |
| InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_cdbguser") |
| { |
| setFlag(Branch); |
| } // Inst_SOPP__S_CBRANCH_CDBGUSER |
| |
| Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_CDBGUSER |
| |
| void |
| Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER( |
| InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user") |
| { |
| setFlag(Branch); |
| } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER |
| |
| Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER:: |
| ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER |
| |
| void |
| Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: |
| Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user") |
| { |
| setFlag(Branch); |
| } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER |
| |
| Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: |
| ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER() |
| { |
| } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER |
| |
| void |
| Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_endpgm_saved") |
| { |
| } // Inst_SOPP__S_ENDPGM_SAVED |
| |
| Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED() |
| { |
| } // ~Inst_SOPP__S_ENDPGM_SAVED |
| |
| // End of program. |
| void |
| Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF( |
| InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_set_gpr_idx_off") |
| { |
| } // Inst_SOPP__S_SET_GPR_IDX_OFF |
| |
| Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF() |
| { |
| } // ~Inst_SOPP__S_SET_GPR_IDX_OFF |
| |
| // MODE.gpr_idx_en = 0. |
| // Clear GPR indexing mode. Vector operations after this will not perform |
| // relative GPR addressing regardless of the contents of M0. |
| void |
| Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE( |
| InFmt_SOPP *iFmt) |
| : Inst_SOPP(iFmt, "s_set_gpr_idx_mode") |
| { |
| } // Inst_SOPP__S_SET_GPR_IDX_MODE |
| |
| Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE() |
| { |
| } // ~Inst_SOPP__S_SET_GPR_IDX_MODE |
| |
| // M0[15:12] = SIMM4. |
| // Modify the mode used for vector GPR indexing. |
| // The raw contents of the source field are read and used to set the enable |
| // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL |
| // and SIMM4[3] = VDST_REL. |
| void |
| Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_load_dword") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_LOAD_DWORD |
| |
| Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD() |
| { |
| } // ~Inst_SMEM__S_LOAD_DWORD |
| |
| /** |
| * Read 1 dword from scalar data cache. If the offset is specified as an |
| * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are |
| * ignored). If the offset is specified as an immediate 20-bit constant, |
| * the constant is an unsigned byte offset. |
| */ |
| void |
| Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); |
| |
| addr.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, addr, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe |
| .getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<1>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_load_dwordx2") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_LOAD_DWORDX2 |
| |
| Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2() |
| { |
| } // ~Inst_SMEM__S_LOAD_DWORDX2 |
| |
| /** |
| * Read 2 dwords from scalar data cache. See s_load_dword for details on |
| * the offset input. |
| */ |
| void |
| Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); |
| |
| addr.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, addr, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe. |
| getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<2>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_load_dwordx4") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_LOAD_DWORDX4 |
| |
| Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4() |
| { |
| } // ~Inst_SMEM__S_LOAD_DWORDX4 |
| |
| // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); |
| |
| addr.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, addr, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe. |
| getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<4>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_load_dwordx8") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_LOAD_DWORDX8 |
| |
| Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8() |
| { |
| } // ~Inst_SMEM__S_LOAD_DWORDX8 |
| |
| // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); |
| |
| addr.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, addr, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe. |
| getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<8>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_load_dwordx16") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_LOAD_DWORDX16 |
| |
| Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16() |
| { |
| } // ~Inst_SMEM__S_LOAD_DWORDX16 |
| |
| // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); |
| |
| addr.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, addr, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe. |
| getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<16>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD( |
| InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_buffer_load_dword") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_BUFFER_LOAD_DWORD |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD() |
| { |
| } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD |
| |
| // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the |
| // offset input. |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); |
| |
| rsrcDesc.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, rsrcDesc, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe |
| .getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<1>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| // 1 request, size 32 |
| ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2( |
| InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_buffer_load_dwordx2") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2 |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2() |
| { |
| } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2 |
| |
| // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); |
| |
| rsrcDesc.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, rsrcDesc, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe |
| .getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<2>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| // use U64 because 2 requests, each size 32 |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4( |
| InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_buffer_load_dwordx4") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4 |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4() |
| { |
| } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4 |
| |
| // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); |
| |
| rsrcDesc.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, rsrcDesc, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe |
| .getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<4>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| // 4 requests, each size 32 |
| ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8( |
| InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_buffer_load_dwordx8") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8 |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8() |
| { |
| } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8 |
| |
| // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); |
| |
| rsrcDesc.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, rsrcDesc, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe |
| .getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<8>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| // 8 requests, each size 32 |
| ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16( |
| InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_buffer_load_dwordx16") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16 |
| |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16() |
| { |
| } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16 |
| |
| // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); |
| |
| rsrcDesc.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, rsrcDesc, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe |
| .getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarRdGmReqsInPipe--; |
| wf->scalarOutstandingReqsRdGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<16>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| // 16 requests, each size 32 |
| ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); |
| sdst.write(); |
| } // completeAcc |
| |
| Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_store_dword") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_SMEM__S_STORE_DWORD |
| |
| Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD() |
| { |
| } // ~Inst_SMEM__S_STORE_DWORD |
| |
| // Write 1 dword to scalar data cache. |
| // If the offset is specified as an SGPR, the SGPR contains an unsigned |
| // BYTE offset (the 2 LSBs are ignored). |
| // If the offset is specified as an immediate 20-bit constant, the |
| // constant is an unsigned BYTE offset. |
| void |
| Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); |
| |
| addr.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, addr, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe. |
| getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarWrGmReqsInPipe--; |
| wf->scalarOutstandingReqsWrGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA); |
| sdata.read(); |
| std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), |
| sizeof(ScalarRegU32)); |
| initMemWrite<1>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_store_dwordx2") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_SMEM__S_STORE_DWORDX2 |
| |
| Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2() |
| { |
| } // ~Inst_SMEM__S_STORE_DWORDX2 |
| |
| // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); |
| |
| addr.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, addr, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe. |
| getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarWrGmReqsInPipe--; |
| wf->scalarOutstandingReqsWrGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); |
| sdata.read(); |
| std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), |
| sizeof(ScalarRegU64)); |
| initMemWrite<2>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_store_dwordx4") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_SMEM__S_STORE_DWORDX4 |
| |
| Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4() |
| { |
| } // ~Inst_SMEM__S_STORE_DWORDX4 |
| |
| // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| ScalarRegU32 offset(0); |
| ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); |
| |
| addr.read(); |
| |
| if (instData.IMM) { |
| offset = extData.OFFSET; |
| } else { |
| ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); |
| off_sgpr.read(); |
| offset = off_sgpr.rawData(); |
| } |
| |
| calcAddr(gpuDynInst, addr, offset); |
| |
| gpuDynInst->computeUnit()->scalarMemoryPipe. |
| getGMReqFIFO().push(gpuDynInst); |
| |
| wf->scalarWrGmReqsInPipe--; |
| wf->scalarOutstandingReqsWrGm++; |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU128 sdata(gpuDynInst, instData.SDATA); |
| sdata.read(); |
| std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), |
| 4 * sizeof(ScalarRegU32)); |
| initMemWrite<4>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD( |
| InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_buffer_store_dword") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_SMEM__S_BUFFER_STORE_DWORD |
| |
| Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD() |
| { |
| } // ~Inst_SMEM__S_BUFFER_STORE_DWORD |
| |
| // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the |
| // offset input. |
| void |
| Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2( |
| InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_buffer_store_dwordx2") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_SMEM__S_BUFFER_STORE_DWORDX2 |
| |
| Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2() |
| { |
| } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2 |
| |
| // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4( |
| InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_buffer_store_dwordx4") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_SMEM__S_BUFFER_STORE_DWORDX4 |
| |
| Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4() |
| { |
| } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4 |
| |
| // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on |
| // the offset input. |
| void |
| Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_dcache_inv") |
| { |
| } // Inst_SMEM__S_DCACHE_INV |
| |
| Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV() |
| { |
| } // ~Inst_SMEM__S_DCACHE_INV |
| |
| // Invalidate the scalar data cache. |
| void |
| Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_dcache_wb") |
| { |
| } // Inst_SMEM__S_DCACHE_WB |
| |
| Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB() |
| { |
| } // ~Inst_SMEM__S_DCACHE_WB |
| |
| // Write back dirty data in the scalar data cache. |
| void |
| Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_dcache_inv_vol") |
| { |
| } // Inst_SMEM__S_DCACHE_INV_VOL |
| |
| Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL() |
| { |
| } // ~Inst_SMEM__S_DCACHE_INV_VOL |
| |
| // Invalidate the scalar data cache volatile lines. |
| void |
| Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_dcache_wb_vol") |
| { |
| } // Inst_SMEM__S_DCACHE_WB_VOL |
| |
| Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL() |
| { |
| } // ~Inst_SMEM__S_DCACHE_WB_VOL |
| |
| // Write back dirty data in the scalar data cache volatile lines. |
| void |
| Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_memtime") |
| { |
| } // Inst_SMEM__S_MEMTIME |
| |
| Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME() |
| { |
| } // ~Inst_SMEM__S_MEMTIME |
| |
| // Return current 64-bit timestamp. |
| void |
| Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_memrealtime") |
| { |
| } // Inst_SMEM__S_MEMREALTIME |
| |
| Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME() |
| { |
| } // ~Inst_SMEM__S_MEMREALTIME |
| |
| // Return current 64-bit RTC. |
| void |
| Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_atc_probe") |
| { |
| } // Inst_SMEM__S_ATC_PROBE |
| |
| Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE() |
| { |
| } // ~Inst_SMEM__S_ATC_PROBE |
| |
| void |
| Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER( |
| InFmt_SMEM *iFmt) |
| : Inst_SMEM(iFmt, "s_atc_probe_buffer") |
| { |
| } // Inst_SMEM__S_ATC_PROBE_BUFFER |
| |
| Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER() |
| { |
| } // ~Inst_SMEM__S_ATC_PROBE_BUFFER |
| |
| void |
| Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_cndmask_b32") |
| { |
| setFlag(ALU); |
| setFlag(ReadsVCC); |
| } // Inst_VOP2__V_CNDMASK_B32 |
| |
| Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32() |
| { |
| } // ~Inst_VOP2__V_CNDMASK_B32 |
| |
| // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC |
| // as a scalar GPR in S2. |
| void |
| Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| vcc.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] |
| = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_add_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP2__V_ADD_F32 |
| |
| Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32() |
| { |
| } // ~Inst_VOP2__V_ADD_F32 |
| |
| // D.f = S0.f + S1.f. |
| void |
| Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| VecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| if (isDPPInst()) { |
| VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); |
| src0_dpp.read(); |
| |
| DPRINTF(GCN3, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], " |
| "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " |
| "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, " |
| "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, |
| extData.iFmt_VOP_DPP.DPP_CTRL, |
| extData.iFmt_VOP_DPP.SRC0_ABS, |
| extData.iFmt_VOP_DPP.SRC0_NEG, |
| extData.iFmt_VOP_DPP.SRC1_ABS, |
| extData.iFmt_VOP_DPP.SRC1_NEG, |
| extData.iFmt_VOP_DPP.BOUND_CTRL, |
| extData.iFmt_VOP_DPP.BANK_MASK, |
| extData.iFmt_VOP_DPP.ROW_MASK); |
| |
| processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0_dpp[lane] + src1[lane]; |
| } |
| } |
| } else { |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] + src1[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_sub_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP2__V_SUB_F32 |
| |
| Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32() |
| { |
| } // ~Inst_VOP2__V_SUB_F32 |
| |
| // D.f = S0.f - S1.f. |
| void |
| Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] - src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_subrev_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP2__V_SUBREV_F32 |
| |
| Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32() |
| { |
| } // ~Inst_VOP2__V_SUBREV_F32 |
| |
| // D.f = S1.f - S0.f. |
| void |
| Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] - src0[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mul_legacy_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP2__V_MUL_LEGACY_F32 |
| |
| Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32() |
| { |
| } // ~Inst_VOP2__V_MUL_LEGACY_F32 |
| |
| // D.f = S0.f * S1.f |
| void |
| Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] * src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mul_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP2__V_MUL_F32 |
| |
| Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32() |
| { |
| } // ~Inst_VOP2__V_MUL_F32 |
| |
| // D.f = S0.f * S1.f. |
| void |
| Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isnan(src0[lane]) || |
| std::isnan(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) && |
| !std::signbit(src0[lane])) { |
| if (std::isinf(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if (!std::signbit(src1[lane])) { |
| vdst[lane] = +0.0; |
| } else { |
| vdst[lane] = -0.0; |
| } |
| } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) && |
| std::signbit(src0[lane])) { |
| if (std::isinf(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if (std::signbit(src1[lane])) { |
| vdst[lane] = +0.0; |
| } else { |
| vdst[lane] = -0.0; |
| } |
| } else if (std::isinf(src0[lane]) && |
| !std::signbit(src0[lane])) { |
| if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| vdst[lane] = NAN; |
| } else if (!std::signbit(src1[lane])) { |
| vdst[lane] = +INFINITY; |
| } else { |
| vdst[lane] = -INFINITY; |
| } |
| } else if (std::isinf(src0[lane]) && |
| std::signbit(src0[lane])) { |
| if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| vdst[lane] = NAN; |
| } else if (std::signbit(src1[lane])) { |
| vdst[lane] = +INFINITY; |
| } else { |
| vdst[lane] = -INFINITY; |
| } |
| } else { |
| vdst[lane] = src0[lane] * src1[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mul_i32_i24") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MUL_I32_I24 |
| |
| Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24() |
| { |
| } // ~Inst_VOP2__V_MUL_I32_I24 |
| |
| // D.i = S0.i[23:0] * S1.i[23:0]. |
| void |
| Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = szext<24>(src0[lane]) * szext<24>(src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mul_hi_i32_i24") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MUL_HI_I32_I24 |
| |
| Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24() |
| { |
| } // ~Inst_VOP2__V_MUL_HI_I32_I24 |
| |
| // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32. |
| void |
| Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemI64 tmp_src0 = (VecElemI64)szext<24>(src0[lane]); |
| VecElemI64 tmp_src1 = (VecElemI64)szext<24>(src1[lane]); |
| |
| vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mul_u32_u24") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MUL_U32_U24 |
| |
| Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24() |
| { |
| } // ~Inst_VOP2__V_MUL_U32_U24 |
| |
| // D.u = S0.u[23:0] * S1.u[23:0]. |
| void |
| Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| VecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| if (isSDWAInst()) { |
| VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); |
| // use copies of original src0, src1, and dest during selecting |
| VecOperandU32 origSrc0_sdwa(gpuDynInst, |
| extData.iFmt_VOP_SDWA.SRC0); |
| VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 origVdst(gpuDynInst, instData.VDST); |
| |
| src0_sdwa.read(); |
| origSrc0_sdwa.read(); |
| origSrc1.read(); |
| |
| DPRINTF(GCN3, "Handling V_MUL_U32_U24 SRC SDWA. SRC0: register " |
| "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: " |
| "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: " |
| "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", |
| extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, |
| extData.iFmt_VOP_SDWA.DST_UNUSED, |
| extData.iFmt_VOP_SDWA.CLAMP, |
| extData.iFmt_VOP_SDWA.SRC0_SEL, |
| extData.iFmt_VOP_SDWA.SRC0_SEXT, |
| extData.iFmt_VOP_SDWA.SRC0_NEG, |
| extData.iFmt_VOP_SDWA.SRC0_ABS, |
| extData.iFmt_VOP_SDWA.SRC1_SEL, |
| extData.iFmt_VOP_SDWA.SRC1_SEXT, |
| extData.iFmt_VOP_SDWA.SRC1_NEG, |
| extData.iFmt_VOP_SDWA.SRC1_ABS); |
| |
| processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, |
| src1, origSrc1); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = bits(src0_sdwa[lane], 23, 0) * |
| bits(src1[lane], 23, 0); |
| origVdst[lane] = vdst[lane]; // keep copy consistent |
| } |
| } |
| |
| processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); |
| } else { |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = bits(src0[lane], 23, 0) * |
| bits(src1[lane], 23, 0); |
| } |
| } |
| } |
| |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mul_hi_u32_u24") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MUL_HI_U32_U24 |
| |
| Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24() |
| { |
| } // ~Inst_VOP2__V_MUL_HI_U32_U24 |
| |
| // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32. |
| void |
| Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); |
| VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); |
| vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_min_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP2__V_MIN_F32 |
| |
| Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32() |
| { |
| } // ~Inst_VOP2__V_MIN_F32 |
| |
| // D.f = (S0.f < S1.f ? S0.f : S1.f). |
| void |
| Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fmin(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_max_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP2__V_MAX_F32 |
| |
| Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32() |
| { |
| } // ~Inst_VOP2__V_MAX_F32 |
| |
| // D.f = (S0.f >= S1.f ? S0.f : S1.f). |
| void |
| Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fmax(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_min_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MIN_I32 |
| |
| Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32() |
| { |
| } // ~Inst_VOP2__V_MIN_I32 |
| |
| // D.i = min(S0.i, S1.i). |
| void |
| Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::min(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_max_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MAX_I32 |
| |
| Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32() |
| { |
| } // ~Inst_VOP2__V_MAX_I32 |
| |
| // D.i = max(S0.i, S1.i). |
| void |
| Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::max(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_min_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MIN_U32 |
| |
| Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32() |
| { |
| } // ~Inst_VOP2__V_MIN_U32 |
| |
| // D.u = min(S0.u, S1.u). |
| void |
| Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::min(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_max_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MAX_U32 |
| |
| Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32() |
| { |
| } // ~Inst_VOP2__V_MAX_U32 |
| |
| // D.u = max(S0.u, S1.u). |
| void |
| Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::max(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_lshrrev_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_LSHRREV_B32 |
| |
| Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32() |
| { |
| } // ~Inst_VOP2__V_LSHRREV_B32 |
| |
| // D.u = S1.u >> S0.u[4:0]. |
| // The vacated bits are set to zero. |
| void |
| Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_ashrrev_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_ASHRREV_I32 |
| |
| Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32() |
| { |
| } // ~Inst_VOP2__V_ASHRREV_I32 |
| |
| // D.i = signext(S1.i) >> S0.i[4:0]. |
| // The vacated bits are set to the sign bit of the input value. |
| void |
| Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_lshlrev_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_LSHLREV_B32 |
| |
| Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32() |
| { |
| } // ~Inst_VOP2__V_LSHLREV_B32 |
| |
| // D.u = S1.u << S0.u[4:0]. |
| void |
| Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| VecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| if (isSDWAInst()) { |
| VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); |
| // use copies of original src0, src1, and vdst during selecting |
| VecOperandU32 origSrc0_sdwa(gpuDynInst, |
| extData.iFmt_VOP_SDWA.SRC0); |
| VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 origVdst(gpuDynInst, instData.VDST); |
| |
| src0_sdwa.read(); |
| origSrc0_sdwa.read(); |
| origSrc1.read(); |
| |
| DPRINTF(GCN3, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register " |
| "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: " |
| "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: " |
| "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", |
| extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, |
| extData.iFmt_VOP_SDWA.DST_UNUSED, |
| extData.iFmt_VOP_SDWA.CLAMP, |
| extData.iFmt_VOP_SDWA.SRC0_SEL, |
| extData.iFmt_VOP_SDWA.SRC0_SEXT, |
| extData.iFmt_VOP_SDWA.SRC0_NEG, |
| extData.iFmt_VOP_SDWA.SRC0_ABS, |
| extData.iFmt_VOP_SDWA.SRC1_SEL, |
| extData.iFmt_VOP_SDWA.SRC1_SEXT, |
| extData.iFmt_VOP_SDWA.SRC1_NEG, |
| extData.iFmt_VOP_SDWA.SRC1_ABS); |
| |
| processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, |
| src1, origSrc1); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0); |
| origVdst[lane] = vdst[lane]; // keep copy consistent |
| } |
| } |
| |
| processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); |
| } else { |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_and_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_AND_B32 |
| |
| Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32() |
| { |
| } // ~Inst_VOP2__V_AND_B32 |
| |
| // D.u = S0.u & S1.u. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] & src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_or_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_OR_B32 |
| |
| Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32() |
| { |
| } // ~Inst_VOP2__V_OR_B32 |
| |
| // D.u = S0.u | S1.u. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| VecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| if (isSDWAInst()) { |
| VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); |
| // use copies of original src0, src1, and dest during selecting |
| VecOperandU32 origSrc0_sdwa(gpuDynInst, |
| extData.iFmt_VOP_SDWA.SRC0); |
| VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 origVdst(gpuDynInst, instData.VDST); |
| |
| src0_sdwa.read(); |
| origSrc0_sdwa.read(); |
| origSrc1.read(); |
| |
| DPRINTF(GCN3, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], " |
| "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, " |
| "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " |
| "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", |
| extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, |
| extData.iFmt_VOP_SDWA.DST_UNUSED, |
| extData.iFmt_VOP_SDWA.CLAMP, |
| extData.iFmt_VOP_SDWA.SRC0_SEL, |
| extData.iFmt_VOP_SDWA.SRC0_SEXT, |
| extData.iFmt_VOP_SDWA.SRC0_NEG, |
| extData.iFmt_VOP_SDWA.SRC0_ABS, |
| extData.iFmt_VOP_SDWA.SRC1_SEL, |
| extData.iFmt_VOP_SDWA.SRC1_SEXT, |
| extData.iFmt_VOP_SDWA.SRC1_NEG, |
| extData.iFmt_VOP_SDWA.SRC1_ABS); |
| |
| processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, |
| src1, origSrc1); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0_sdwa[lane] | src1[lane]; |
| origVdst[lane] = vdst[lane]; // keep copy consistent |
| } |
| } |
| |
| processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); |
| } else { |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] | src1[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_xor_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_XOR_B32 |
| |
| Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32() |
| { |
| } // ~Inst_VOP2__V_XOR_B32 |
| |
| // D.u = S0.u ^ S1.u. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] ^ src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mac_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| setFlag(MAC); |
| } // Inst_VOP2__V_MAC_F32 |
| |
| Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32() |
| { |
| } // ~Inst_VOP2__V_MAC_F32 |
| |
| // D.f = S0.f * S1.f + D.f. |
| void |
| Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| VecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| vdst.read(); |
| |
| if (isDPPInst()) { |
| VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); |
| src0_dpp.read(); |
| |
| DPRINTF(GCN3, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], " |
| "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " |
| "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, " |
| "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, |
| extData.iFmt_VOP_DPP.DPP_CTRL, |
| extData.iFmt_VOP_DPP.SRC0_ABS, |
| extData.iFmt_VOP_DPP.SRC0_NEG, |
| extData.iFmt_VOP_DPP.SRC1_ABS, |
| extData.iFmt_VOP_DPP.SRC1_NEG, |
| extData.iFmt_VOP_DPP.BOUND_CTRL, |
| extData.iFmt_VOP_DPP.BANK_MASK, |
| extData.iFmt_VOP_DPP.ROW_MASK); |
| |
| processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0_dpp[lane], src1[lane], |
| vdst[lane]); |
| } |
| } |
| } else { |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_madmk_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| setFlag(MAD); |
| } // Inst_VOP2__V_MADMK_F32 |
| |
| Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32() |
| { |
| } // ~Inst_VOP2__V_MADMK_F32 |
| |
| // D.f = S0.f * K + S1.f; K is a 32-bit inline constant. |
| // This opcode cannot use the input/output modifiers. |
| void |
| Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| VecElemF32 k = extData.imm_f32; |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0[lane], k, src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_madak_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| setFlag(MAD); |
| } // Inst_VOP2__V_MADAK_F32 |
| |
| Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32() |
| { |
| } // ~Inst_VOP2__V_MADAK_F32 |
| |
| // D.f = S0.f * S1.f + K; K is a 32-bit inline constant. |
| // This opcode cannot use input/output modifiers. |
| void |
| Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| VecElemF32 k = extData.imm_f32; |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0[lane], src1[lane], k); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_add_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| } // Inst_VOP2__V_ADD_U32 |
| |
| Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32() |
| { |
| } // ~Inst_VOP2__V_ADD_U32 |
| |
| // D.u = S0.u + S1.u; |
| // VCC[threadId] = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an UNSIGNED |
| // overflow or carry-out. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair. |
| void |
| Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| VecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| if (isSDWAInst()) { |
| VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); |
| // use copies of original src0, src1, and dest during selecting |
| VecOperandU32 origSrc0_sdwa(gpuDynInst, |
| extData.iFmt_VOP_SDWA.SRC0); |
| VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 origVdst(gpuDynInst, instData.VDST); |
| |
| src0_sdwa.read(); |
| origSrc0_sdwa.read(); |
| origSrc1.read(); |
| |
| DPRINTF(GCN3, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], " |
| "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, " |
| "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " |
| "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", |
| extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, |
| extData.iFmt_VOP_SDWA.DST_UNUSED, |
| extData.iFmt_VOP_SDWA.CLAMP, |
| extData.iFmt_VOP_SDWA.SRC0_SEL, |
| extData.iFmt_VOP_SDWA.SRC0_SEXT, |
| extData.iFmt_VOP_SDWA.SRC0_NEG, |
| extData.iFmt_VOP_SDWA.SRC0_ABS, |
| extData.iFmt_VOP_SDWA.SRC1_SEL, |
| extData.iFmt_VOP_SDWA.SRC1_SEXT, |
| extData.iFmt_VOP_SDWA.SRC1_NEG, |
| extData.iFmt_VOP_SDWA.SRC1_ABS); |
| |
| processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, |
| src1, origSrc1); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0_sdwa[lane] + src1[lane]; |
| origVdst[lane] = vdst[lane]; // keep copy consistent |
| vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane] |
| + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); |
| } |
| } |
| |
| processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); |
| } else { |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] + src1[lane]; |
| vcc.setBit(lane, ((VecElemU64)src0[lane] |
| + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); |
| } |
| } |
| } |
| |
| vcc.write(); |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_sub_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| } // Inst_VOP2__V_SUB_U32 |
| |
| Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32() |
| { |
| } // ~Inst_VOP2__V_SUB_U32 |
| |
| // D.u = S0.u - S1.u; |
| // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or |
| // carry-out. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair. |
| void |
| Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] - src1[lane]; |
| vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| vcc.write(); |
| } |
| |
| Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_subrev_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| } // Inst_VOP2__V_SUBREV_U32 |
| |
| Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32() |
| { |
| } // ~Inst_VOP2__V_SUBREV_U32 |
| |
| // D.u = S1.u - S0.u; |
| // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or |
| // carry-out. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair. |
| void |
| Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] - src0[lane]; |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| vcc.write(); |
| } |
| |
| Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_addc_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(ReadsVCC); |
| } // Inst_VOP2__V_ADDC_U32 |
| |
| Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32() |
| { |
| } // ~Inst_VOP2__V_ADDC_U32 |
| |
| // D.u = S0.u + S1.u + VCC[threadId]; |
| // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0) |
| // is an UNSIGNED overflow. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC |
| // source comes from the SGPR-pair at S2.u. |
| void |
| Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| vcc.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] + src1[lane] |
| + bits(vcc.rawData(), lane); |
| vcc.setBit(lane, ((VecElemU64)src0[lane] |
| + (VecElemU64)src1[lane] |
| + (VecElemU64)bits(vcc.rawData(), lane, lane)) |
| >= 0x100000000 ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| vcc.write(); |
| } |
| |
| Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_subb_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(ReadsVCC); |
| } // Inst_VOP2__V_SUBB_U32 |
| |
| Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32() |
| { |
| } // ~Inst_VOP2__V_SUBB_U32 |
| |
| // D.u = S0.u - S1.u - VCC[threadId]; |
| // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED |
| // overflow. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC |
| // source comes from the SGPR-pair at S2.u. |
| void |
| Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| vcc.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] |
| = src0[lane] - src1[lane] - bits(vcc.rawData(), lane); |
| vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) |
| > src0[lane] ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| vcc.write(); |
| } |
| |
| Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_subbrev_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(ReadsVCC); |
| } // Inst_VOP2__V_SUBBREV_U32 |
| |
| Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32() |
| { |
| } // ~Inst_VOP2__V_SUBBREV_U32 |
| |
| // D.u = S1.u - S0.u - VCC[threadId]; |
| // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED |
| // overflow. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC |
| // source comes from the SGPR-pair at S2.u. |
| void |
| Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| vcc.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] |
| = src1[lane] - src0[lane] - bits(vcc.rawData(), lane); |
| vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane)) |
| > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| vcc.write(); |
| } |
| |
| Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_add_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP2__V_ADD_F16 |
| |
| Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16() |
| { |
| } // ~Inst_VOP2__V_ADD_F16 |
| |
| // D.f16 = S0.f16 + S1.f16. |
| void |
| Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_sub_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP2__V_SUB_F16 |
| |
| Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16() |
| { |
| } // ~Inst_VOP2__V_SUB_F16 |
| |
| // D.f16 = S0.f16 - S1.f16. |
| void |
| Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_subrev_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP2__V_SUBREV_F16 |
| |
| Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16() |
| { |
| } // ~Inst_VOP2__V_SUBREV_F16 |
| |
| // D.f16 = S1.f16 - S0.f16. |
| void |
| Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mul_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP2__V_MUL_F16 |
| |
| Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16() |
| { |
| } // ~Inst_VOP2__V_MUL_F16 |
| |
| // D.f16 = S0.f16 * S1.f16. |
| void |
| Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mac_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| setFlag(MAC); |
| } // Inst_VOP2__V_MAC_F16 |
| |
| Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16() |
| { |
| } // ~Inst_VOP2__V_MAC_F16 |
| |
| // D.f16 = S0.f16 * S1.f16 + D.f16. |
| void |
| Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_madmk_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| setFlag(MAD); |
| } // Inst_VOP2__V_MADMK_F16 |
| |
| Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16() |
| { |
| } // ~Inst_VOP2__V_MADMK_F16 |
| |
| // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored |
| // in the following literal DWORD. |
| // This opcode cannot use the VOP3 encoding and cannot use input/output |
| // modifiers. |
| void |
| Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_madak_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| setFlag(MAD); |
| } // Inst_VOP2__V_MADAK_F16 |
| |
| Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16() |
| { |
| } // ~Inst_VOP2__V_MADAK_F16 |
| |
| // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored |
| // in the following literal DWORD. |
| // This opcode cannot use the VOP3 encoding and cannot use input/output |
| // modifiers. |
| void |
| Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_add_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_ADD_U16 |
| |
| Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16() |
| { |
| } // ~Inst_VOP2__V_ADD_U16 |
| |
| // D.u16 = S0.u16 + S1.u16. |
| void |
| Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] + src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_sub_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_SUB_U16 |
| |
| Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16() |
| { |
| } // ~Inst_VOP2__V_SUB_U16 |
| |
| // D.u16 = S0.u16 - S1.u16. |
| void |
| Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] - src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_subrev_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_SUBREV_U16 |
| |
| Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16() |
| { |
| } // ~Inst_VOP2__V_SUBREV_U16 |
| |
| // D.u16 = S1.u16 - S0.u16. |
| void |
| Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] - src0[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_mul_lo_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MUL_LO_U16 |
| |
| Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16() |
| { |
| } // ~Inst_VOP2__V_MUL_LO_U16 |
| |
| // D.u16 = S0.u16 * S1.u16. |
| void |
| Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] * src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_lshlrev_b16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_LSHLREV_B16 |
| |
| Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16() |
| { |
| } // ~Inst_VOP2__V_LSHLREV_B16 |
| |
| // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. |
| void |
| Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_lshrrev_b16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_LSHRREV_B16 |
| |
| Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16() |
| { |
| } // ~Inst_VOP2__V_LSHRREV_B16 |
| |
| // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. |
| // The vacated bits are set to zero. |
| void |
| Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] >> src0[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_ashrrev_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_ASHRREV_I16 |
| |
| Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16() |
| { |
| } // ~Inst_VOP2__V_ASHRREV_I16 |
| |
| // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. |
| // The vacated bits are set to the sign bit of the input value. |
| void |
| Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandI16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] >> src0[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_max_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP2__V_MAX_F16 |
| |
| Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16() |
| { |
| } // ~Inst_VOP2__V_MAX_F16 |
| |
| // D.f16 = max(S0.f16, S1.f16). |
| void |
| Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_min_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP2__V_MIN_F16 |
| |
| Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16() |
| { |
| } // ~Inst_VOP2__V_MIN_F16 |
| |
| // D.f16 = min(S0.f16, S1.f16). |
| void |
| Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_max_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MAX_U16 |
| |
| Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16() |
| { |
| } // ~Inst_VOP2__V_MAX_U16 |
| |
| // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). |
| void |
| Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::max(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_max_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MAX_I16 |
| |
| Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16() |
| { |
| } // ~Inst_VOP2__V_MAX_I16 |
| |
| // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). |
| void |
| Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandI16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::max(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_min_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MIN_U16 |
| |
| Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16() |
| { |
| } // ~Inst_VOP2__V_MIN_U16 |
| |
| // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). |
| void |
| Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::min(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_min_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOP2__V_MIN_I16 |
| |
| Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16() |
| { |
| } // ~Inst_VOP2__V_MIN_I16 |
| |
| // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). |
| void |
| Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| VecOperandI16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::min(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt) |
| : Inst_VOP2(iFmt, "v_ldexp_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP2__V_LDEXP_F16 |
| |
| Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16() |
| { |
| } // ~Inst_VOP2__V_LDEXP_F16 |
| |
| // D.f16 = S0.f16 * (2 ** S1.i16). |
| void |
| Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_nop") |
| { |
| setFlag(Nop); |
| setFlag(ALU); |
| } // Inst_VOP1__V_NOP |
| |
| Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP() |
| { |
| } // ~Inst_VOP1__V_NOP |
| |
| // Do nothing. |
| void |
| Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_mov_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP1__V_MOV_B32 |
| |
| Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32() |
| { |
| } // ~Inst_VOP1__V_MOV_B32 |
| |
| // D.u = S0.u. |
| // Input and output modifiers not supported; this is an untyped operation. |
| void |
| Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (isDPPInst()) { |
| VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); |
| src_dpp.read(); |
| |
| DPRINTF(GCN3, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], " |
| "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " |
| "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, " |
| "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, |
| extData.iFmt_VOP_DPP.DPP_CTRL, |
| extData.iFmt_VOP_DPP.SRC0_ABS, |
| extData.iFmt_VOP_DPP.SRC0_NEG, |
| extData.iFmt_VOP_DPP.SRC1_ABS, |
| extData.iFmt_VOP_DPP.SRC1_NEG, |
| extData.iFmt_VOP_DPP.BOUND_CTRL, |
| extData.iFmt_VOP_DPP.BANK_MASK, |
| extData.iFmt_VOP_DPP.ROW_MASK); |
| |
| // NOTE: For VOP1, there is no SRC1, so make sure we're not trying |
| // to negate it or take the absolute value of it |
| assert(!extData.iFmt_VOP_DPP.SRC1_ABS); |
| assert(!extData.iFmt_VOP_DPP.SRC1_NEG); |
| processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src_dpp[lane]; |
| } |
| } |
| } else { |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32( |
| InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_readfirstlane_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP1__V_READFIRSTLANE_B32 |
| |
| Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32() |
| { |
| } // ~Inst_VOP1__V_READFIRSTLANE_B32 |
| |
| // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data |
| // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec) |
| // (Lane# = 0 if exec is zero). Ignores exec mask for the access. |
| // Input and output modifiers not supported; this is an untyped operation. |
| void |
| Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarRegI32 src_lane(0); |
| ScalarRegU64 exec_mask = wf->execMask().to_ullong(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| ScalarOperandU32 sdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (exec_mask) { |
| src_lane = findLsbSet(exec_mask); |
| } |
| |
| sdst = src[src_lane]; |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_i32_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_CVT_I32_F64 |
| |
| Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64() |
| { |
| } // ~Inst_VOP1__V_CVT_I32_F64 |
| |
| // D.i = (int)S0.d. |
| // Out-of-range floating point values (including infinity) saturate. NaN |
| // is converted to 0. |
| void |
| Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int exp; |
| std::frexp(src[lane],&exp); |
| if (std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else if (std::isinf(src[lane]) || exp > 30) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = INT_MIN; |
| } else { |
| vdst[lane] = INT_MAX; |
| } |
| } else { |
| vdst[lane] = (VecElemI32)src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f64_i32") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_CVT_F64_I32 |
| |
| Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32() |
| { |
| } // ~Inst_VOP1__V_CVT_F64_I32 |
| |
| // D.d = (double)S0.i. |
| void |
| Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF64)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f32_i32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_F32_I32 |
| |
| Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32() |
| { |
| } // ~Inst_VOP1__V_CVT_F32_I32 |
| |
| // D.f = (float)S0.i. |
| void |
| Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f32_u32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_F32_U32 |
| |
| Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32() |
| { |
| } // ~Inst_VOP1__V_CVT_F32_U32 |
| |
| // D.f = (float)S0.u. |
| void |
| Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_u32_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_U32_F32 |
| |
| Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32() |
| { |
| } // ~Inst_VOP1__V_CVT_U32_F32 |
| |
| // D.u = (unsigned)S0.f. |
| // Out-of-range floating point values (including infinity) saturate. NaN |
| // is converted to 0. |
| void |
| Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int exp; |
| std::frexp(src[lane],&exp); |
| if (std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else if (std::isinf(src[lane])) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = 0; |
| } else { |
| vdst[lane] = UINT_MAX; |
| } |
| } else if (exp > 31) { |
| vdst[lane] = UINT_MAX; |
| } else { |
| vdst[lane] = (VecElemU32)src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_i32_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_I32_F32 |
| |
| Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32() |
| { |
| } // ~Inst_VOP1__V_CVT_I32_F32 |
| |
| // D.i = (int)S0.f. |
| // Out-of-range floating point values (including infinity) saturate. NaN |
| // is converted to 0. |
| void |
| Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int exp; |
| std::frexp(src[lane],&exp); |
| if (std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else if (std::isinf(src[lane]) || exp > 30) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = INT_MIN; |
| } else { |
| vdst[lane] = INT_MAX; |
| } |
| } else { |
| vdst[lane] = (VecElemI32)src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_mov_fed_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP1__V_MOV_FED_B32 |
| |
| Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32() |
| { |
| } // ~Inst_VOP1__V_MOV_FED_B32 |
| |
| // D.u = S0.u; |
| // Input and output modifiers not supported; this is an untyped operation. |
| void |
| Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f16_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_F16_F32 |
| |
| Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32() |
| { |
| } // ~Inst_VOP1__V_CVT_F16_F32 |
| |
| // D.f16 = flt32_to_flt16(S0.f). |
| void |
| Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f32_f16") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_F32_F16 |
| |
| Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16() |
| { |
| } // ~Inst_VOP1__V_CVT_F32_F16 |
| |
| // D.f = flt16_to_flt32(S0.f16). |
| void |
| Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32( |
| InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_RPI_I32_F32 |
| |
| Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32() |
| { |
| } // ~Inst_VOP1__V_CVT_RPI_I32_F32 |
| |
| // D.i = (int)floor(S0.f + 0.5). |
| void |
| Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32( |
| InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_FLR_I32_F32 |
| |
| Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32() |
| { |
| } // ~Inst_VOP1__V_CVT_FLR_I32_F32 |
| |
| // D.i = (int)floor(S0.f). |
| void |
| Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemI32)std::floor(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_off_f32_i4") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_OFF_F32_I4 |
| |
| Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4() |
| { |
| } // ~Inst_VOP1__V_CVT_OFF_F32_I4 |
| |
| // 4-bit signed int to 32-bit float. |
| void |
| Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f32_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_CVT_F32_F64 |
| |
| Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64() |
| { |
| } // ~Inst_VOP1__V_CVT_F32_F64 |
| |
| // D.f = (float)S0.d. |
| void |
| Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f64_f32") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_CVT_F64_F32 |
| |
| Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32() |
| { |
| } // ~Inst_VOP1__V_CVT_F64_F32 |
| |
| // D.d = (double)S0.f. |
| void |
| Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF64)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_F32_UBYTE0 |
| |
| Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0() |
| { |
| } // ~Inst_VOP1__V_CVT_F32_UBYTE0 |
| |
| // D.f = (float)(S0.u[7:0]). |
| void |
| Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_F32_UBYTE1 |
| |
| Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1() |
| { |
| } // ~Inst_VOP1__V_CVT_F32_UBYTE1 |
| |
| // D.f = (float)(S0.u[15:8]). |
| void |
| Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_F32_UBYTE2 |
| |
| Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2() |
| { |
| } // ~Inst_VOP1__V_CVT_F32_UBYTE2 |
| |
| // D.f = (float)(S0.u[23:16]). |
| void |
| Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CVT_F32_UBYTE3 |
| |
| Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3() |
| { |
| } // ~Inst_VOP1__V_CVT_F32_UBYTE3 |
| |
| // D.f = (float)(S0.u[31:24]). |
| void |
| Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_u32_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_CVT_U32_F64 |
| |
| Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64() |
| { |
| } // ~Inst_VOP1__V_CVT_U32_F64 |
| |
| // D.u = (unsigned)S0.d. |
| // Out-of-range floating point values (including infinity) saturate. NaN |
| // is converted to 0. |
| void |
| Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int exp; |
| std::frexp(src[lane],&exp); |
| if (std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else if (std::isinf(src[lane])) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = 0; |
| } else { |
| vdst[lane] = UINT_MAX; |
| } |
| } else if (exp > 31) { |
| vdst[lane] = UINT_MAX; |
| } else { |
| vdst[lane] = (VecElemU32)src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f64_u32") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_CVT_F64_U32 |
| |
| Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32() |
| { |
| } // ~Inst_VOP1__V_CVT_F64_U32 |
| |
| // D.d = (double)S0.u. |
| void |
| Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF64)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_trunc_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_TRUNC_F64 |
| |
| Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64() |
| { |
| } // ~Inst_VOP1__V_TRUNC_F64 |
| |
| // D.d = trunc(S0.d), return integer part of S0.d. |
| void |
| Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::trunc(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_ceil_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_CEIL_F64 |
| |
| Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64() |
| { |
| } // ~Inst_VOP1__V_CEIL_F64 |
| |
| // D.d = ceil(S0.d); |
| void |
| Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::ceil(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rndne_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_RNDNE_F64 |
| |
| Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64() |
| { |
| } // ~Inst_VOP1__V_RNDNE_F64 |
| |
| // D.d = round_nearest_even(S0.d). |
| void |
| Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = roundNearestEven(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_floor_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_FLOOR_F64 |
| |
| Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64() |
| { |
| } // ~Inst_VOP1__V_FLOOR_F64 |
| |
| // D.d = floor(S0.d); |
| void |
| Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::floor(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_fract_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_FRACT_F32 |
| |
| Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32() |
| { |
| } // ~Inst_VOP1__V_FRACT_F32 |
| |
| // D.f = modf(S0.f). |
| void |
| Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemF32 int_part(0.0); |
| vdst[lane] = std::modf(src[lane], &int_part); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_trunc_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_TRUNC_F32 |
| |
| Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32() |
| { |
| } // ~Inst_VOP1__V_TRUNC_F32 |
| |
| // D.f = trunc(S0.f), return integer part of S0.f. |
| void |
| Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst (gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::trunc(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_ceil_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_CEIL_F32 |
| |
| Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32() |
| { |
| } // ~Inst_VOP1__V_CEIL_F32 |
| |
| // D.f = ceil(S0.f); |
| void |
| Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::ceil(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rndne_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_RNDNE_F32 |
| |
| Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32() |
| { |
| } // ~Inst_VOP1__V_RNDNE_F32 |
| |
| // D.f = round_nearest_even(S0.f). |
| void |
| Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = roundNearestEven(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_floor_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_FLOOR_F32 |
| |
| Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32() |
| { |
| } // ~Inst_VOP1__V_FLOOR_F32 |
| |
| // D.f = floor(S0.f); |
| void |
| Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::floor(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_exp_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_EXP_F32 |
| |
| Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32() |
| { |
| } // ~Inst_VOP1__V_EXP_F32 |
| |
| // D.f = pow(2.0, S0.f). |
| void |
| Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::pow(2.0, src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_log_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_LOG_F32 |
| |
| Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32() |
| { |
| } // ~Inst_VOP1__V_LOG_F32 |
| |
| // D.f = log2(S0.f). |
| void |
| Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::log2(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rcp_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_RCP_F32 |
| |
| Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32() |
| { |
| } // ~Inst_VOP1__V_RCP_F32 |
| |
| // D.f = 1.0 / S0.f. |
| void |
| Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = 1.0 / src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rcp_iflag_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_RCP_IFLAG_F32 |
| |
| Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32() |
| { |
| } // ~Inst_VOP1__V_RCP_IFLAG_F32 |
| |
| // D.f = 1.0 / S0.f. |
| void |
| Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = 1.0 / src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rsq_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_RSQ_F32 |
| |
| Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32() |
| { |
| } // ~Inst_VOP1__V_RSQ_F32 |
| |
| // D.f = 1.0 / sqrt(S0.f). |
| void |
| Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = 1.0 / std::sqrt(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rcp_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_RCP_F64 |
| |
| Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64() |
| { |
| } // ~Inst_VOP1__V_RCP_F64 |
| |
| // D.d = 1.0 / S0.d. |
| void |
| Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::fpclassify(src[lane]) == FP_ZERO) { |
| vdst[lane] = +INFINITY; |
| } else if (std::isnan(src[lane])) { |
| vdst[lane] = NAN; |
| } else if (std::isinf(src[lane])) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = -0.0; |
| } else { |
| vdst[lane] = 0.0; |
| } |
| } else { |
| vdst[lane] = 1.0 / src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rsq_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_RSQ_F64 |
| |
| Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64() |
| { |
| } // ~Inst_VOP1__V_RSQ_F64 |
| |
| // D.d = 1.0 / sqrt(S0.d). |
| void |
| Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::fpclassify(src[lane]) == FP_ZERO) { |
| vdst[lane] = +INFINITY; |
| } else if (std::isnan(src[lane])) { |
| vdst[lane] = NAN; |
| } else if (std::isinf(src[lane]) |
| && !std::signbit(src[lane])) { |
| vdst[lane] = 0.0; |
| } else if (std::signbit(src[lane])) { |
| vdst[lane] = NAN; |
| } else { |
| vdst[lane] = 1.0 / std::sqrt(src[lane]); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_sqrt_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_SQRT_F32 |
| |
| Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32() |
| { |
| } // ~Inst_VOP1__V_SQRT_F32 |
| |
| // D.f = sqrt(S0.f). |
| void |
| Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::sqrt(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_sqrt_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_SQRT_F64 |
| |
| Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64() |
| { |
| } // ~Inst_VOP1__V_SQRT_F64 |
| |
| // D.d = sqrt(S0.d). |
| void |
| Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::sqrt(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_sin_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_SIN_F32 |
| |
| Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32() |
| { |
| } // ~Inst_VOP1__V_SIN_F32 |
| |
| // D.f = sin(S0.f * 2 * PI). |
| void |
| Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| ConstScalarOperandF32 pi(gpuDynInst, REG_PI); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| pi.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (src[lane] < -256.0 || src[lane] > 256.0) { |
| vdst[lane] = 0.0; |
| } else { |
| vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData()); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cos_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_COS_F32 |
| |
| Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32() |
| { |
| } // ~Inst_VOP1__V_COS_F32 |
| |
| // D.f = cos(S0.f * 2 * PI). |
| void |
| Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| ConstScalarOperandF32 pi(gpuDynInst, REG_PI); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| pi.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (src[lane] < -256.0 || src[lane] > 256.0) { |
| vdst[lane] = 0.0; |
| } else { |
| vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData()); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_not_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP1__V_NOT_B32 |
| |
| Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32() |
| { |
| } // ~Inst_VOP1__V_NOT_B32 |
| |
| // D.u = ~S0.u. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = ~src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_bfrev_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP1__V_BFREV_B32 |
| |
| Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32() |
| { |
| } // ~Inst_VOP1__V_BFREV_B32 |
| |
| // D.u[31:0] = S0.u[0:31], bitfield reverse. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = reverseBits(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_ffbh_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP1__V_FFBH_U32 |
| |
| Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32() |
| { |
| } // ~Inst_VOP1__V_FFBH_U32 |
| |
| // D.u = position of first 1 in S0.u from MSB; |
| // D.u = 0xffffffff if S0.u == 0. |
| void |
| Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = findFirstOneMsb(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_ffbl_b32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP1__V_FFBL_B32 |
| |
| Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32() |
| { |
| } // ~Inst_VOP1__V_FFBL_B32 |
| |
| // D.u = position of first 1 in S0.u from LSB; |
| // D.u = 0xffffffff if S0.u == 0. |
| void |
| Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, instData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = findFirstOne(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_ffbh_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOP1__V_FFBH_I32 |
| |
| Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32() |
| { |
| } // ~Inst_VOP1__V_FFBH_I32 |
| |
| // D.u = position of first bit different from sign bit in S0.i from MSB; |
| // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. |
| void |
| Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src(gpuDynInst, instData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = firstOppositeSignBit(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64( |
| InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_FREXP_EXP_I32_F64 |
| |
| Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64() |
| { |
| } // ~Inst_VOP1__V_FREXP_EXP_I32_F64 |
| |
| void |
| Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isinf(src[lane]) || std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else { |
| VecElemI32 exp = 0; |
| std::frexp(src[lane], &exp); |
| vdst[lane] = exp; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_frexp_mant_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_FREXP_MANT_F64 |
| |
| Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64() |
| { |
| } // ~Inst_VOP1__V_FREXP_MANT_F64 |
| |
| void |
| Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isinf(src[lane]) || std::isnan(src[lane])) { |
| vdst[lane] = src[lane]; |
| } else { |
| VecElemI32 exp(0); |
| vdst[lane] = std::frexp(src[lane], &exp); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_fract_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP1__V_FRACT_F64 |
| |
| Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64() |
| { |
| } // ~Inst_VOP1__V_FRACT_F64 |
| |
| void |
| Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, instData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemF64 int_part(0.0); |
| vdst[lane] = std::modf(src[lane], &int_part); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32( |
| InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_FREXP_EXP_I32_F32 |
| |
| Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32() |
| { |
| } // ~Inst_VOP1__V_FREXP_EXP_I32_F32 |
| |
| // frexp(S0.f, Exponent(S0.f)) |
| // if (S0.f == INF || S0.f == NAN) then D.i = 0; |
| // else D.i = Exponent(S0.f); |
| void |
| Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isinf(src[lane]) || std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else { |
| VecElemI32 exp(0); |
| std::frexp(src[lane], &exp); |
| vdst[lane] = exp; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_frexp_mant_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_FREXP_MANT_F32 |
| |
| Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32() |
| { |
| } // ~Inst_VOP1__V_FREXP_MANT_F32 |
| |
| // if (S0.f == INF || S0.f == NAN) then D.f = S0.f; |
| // else D.f = frexp(S0.f, Exponent(S0.f)). |
| void |
| Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isinf(src[lane]) || std::isnan(src[lane])) { |
| vdst[lane] = src[lane]; |
| } else { |
| VecElemI32 exp(0); |
| vdst[lane] = std::frexp(src[lane], &exp); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_clrexcp") |
| { |
| setFlag(ALU); |
| } // Inst_VOP1__V_CLREXCP |
| |
| Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP() |
| { |
| } // ~Inst_VOP1__V_CLREXCP |
| |
| void |
| Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f16_u16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_CVT_F16_U16 |
| |
| Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16() |
| { |
| } // ~Inst_VOP1__V_CVT_F16_U16 |
| |
| // D.f16 = uint16_to_flt16(S.u16). |
| void |
| Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_f16_i16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_CVT_F16_I16 |
| |
| Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16() |
| { |
| } // ~Inst_VOP1__V_CVT_F16_I16 |
| |
| // D.f16 = int16_to_flt16(S.i16). |
| void |
| Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_u16_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_CVT_U16_F16 |
| |
| Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16() |
| { |
| } // ~Inst_VOP1__V_CVT_U16_F16 |
| |
| // D.u16 = flt16_to_uint16(S.f16). |
| void |
| Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cvt_i16_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_CVT_I16_F16 |
| |
| Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16() |
| { |
| } // ~Inst_VOP1__V_CVT_I16_F16 |
| |
| // D.i16 = flt16_to_int16(S.f16). |
| void |
| Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rcp_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_RCP_F16 |
| |
| Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16() |
| { |
| } // ~Inst_VOP1__V_RCP_F16 |
| |
| // if (S0.f16 == 1.0f) |
| // D.f16 = 1.0f; |
| // else |
| // D.f16 = 1 / S0.f16; |
| void |
| Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_sqrt_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_SQRT_F16 |
| |
| Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16() |
| { |
| } // ~Inst_VOP1__V_SQRT_F16 |
| |
| // if (S0.f16 == 1.0f) |
| // D.f16 = 1.0f; |
| // else |
| // D.f16 = sqrt(S0.f16); |
| void |
| Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rsq_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_RSQ_F16 |
| |
| Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16() |
| { |
| } // ~Inst_VOP1__V_RSQ_F16 |
| |
| // if (S0.f16 == 1.0f) |
| // D.f16 = 1.0f; |
| // else |
| // D.f16 = 1 / sqrt(S0.f16); |
| void |
| Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_log_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_LOG_F16 |
| |
| Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16() |
| { |
| } // ~Inst_VOP1__V_LOG_F16 |
| |
| // if (S0.f16 == 1.0f) |
| // D.f16 = 0.0f; |
| // else |
| // D.f16 = log2(S0.f16); |
| void |
| Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_exp_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_EXP_F16 |
| |
| Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16() |
| { |
| } // ~Inst_VOP1__V_EXP_F16 |
| |
| // if (S0.f16 == 0.0f) |
| // D.f16 = 1.0f; |
| // else |
| // D.f16 = pow(2.0, S0.f16). |
| void |
| Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_frexp_mant_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_FREXP_MANT_F16 |
| |
| Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16() |
| { |
| } // ~Inst_VOP1__V_FREXP_MANT_F16 |
| |
| // if (S0.f16 == +-INF || S0.f16 == NAN) |
| // D.f16 = S0.f16; |
| // else |
| // D.f16 = mantissa(S0.f16). |
| void |
| Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16( |
| InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_FREXP_EXP_I16_F16 |
| |
| Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16() |
| { |
| } // ~Inst_VOP1__V_FREXP_EXP_I16_F16 |
| |
| // frexp(S0.f16, Exponent(S0.f16)) |
| // if (S0.f16 == +-INF || S0.f16 == NAN) |
| // D.i16 = 0; |
| // else |
| // D.i16 = Exponent(S0.f16); |
| void |
| Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_floor_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_FLOOR_F16 |
| |
| Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16() |
| { |
| } // ~Inst_VOP1__V_FLOOR_F16 |
| |
| // D.f16 = floor(S0.f16); |
| void |
| Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_ceil_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_CEIL_F16 |
| |
| Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16() |
| { |
| } // ~Inst_VOP1__V_CEIL_F16 |
| |
| // D.f16 = ceil(S0.f16); |
| void |
| Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_trunc_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_TRUNC_F16 |
| |
| Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16() |
| { |
| } // ~Inst_VOP1__V_TRUNC_F16 |
| |
| // D.f16 = trunc(S0.f16). |
| void |
| Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_rndne_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_RNDNE_F16 |
| |
| Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16() |
| { |
| } // ~Inst_VOP1__V_RNDNE_F16 |
| |
| // D.f16 = roundNearestEven(S0.f16); |
| void |
| Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_fract_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_FRACT_F16 |
| |
| Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16() |
| { |
| } // ~Inst_VOP1__V_FRACT_F16 |
| |
| // D.f16 = S0.f16 + -floor(S0.f16). |
| void |
| Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_sin_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_SIN_F16 |
| |
| Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16() |
| { |
| } // ~Inst_VOP1__V_SIN_F16 |
| |
| // D.f16 = sin(S0.f16 * 2 * PI). |
| void |
| Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_cos_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP1__V_COS_F16 |
| |
| Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16() |
| { |
| } // ~Inst_VOP1__V_COS_F16 |
| |
| // D.f16 = cos(S0.f16 * 2 * PI). |
| void |
| Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_exp_legacy_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_EXP_LEGACY_F32 |
| |
| Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32() |
| { |
| } // ~Inst_VOP1__V_EXP_LEGACY_F32 |
| |
| // D.f = pow(2.0, S0.f) |
| void |
| Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::pow(2.0, src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt) |
| : Inst_VOP1(iFmt, "v_log_legacy_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP1__V_LOG_LEGACY_F32 |
| |
| Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32() |
| { |
| } // ~Inst_VOP1__V_LOG_LEGACY_F32 |
| |
| // D.f = log2(S0.f). |
| void |
| Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, instData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::log2(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_class_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_CLASS_F32 |
| |
| Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_CLASS_F32 |
| |
| // VCC = IEEE numeric class function specified in S1.u, performed on S0.f |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (bits(src1[lane], 0) || bits(src1[lane], 1)) { |
| // is NaN |
| if (std::isnan(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 2)) { |
| // is -infinity |
| if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 3)) { |
| // is -normal |
| if (std::isnormal(src0[lane]) |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 4)) { |
| // is -denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 5)) { |
| // is -zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 6)) { |
| // is +zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 7)) { |
| // is +denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 8)) { |
| // is +normal |
| if (std::isnormal(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 9)) { |
| // is +infinity |
| if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_class_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_CLASS_F32 |
| |
| Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_CLASS_F32 |
| |
| // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on |
| // S0.f The function reports true if the floating point value is any of |
| // the numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (bits(src1[lane], 0) || bits(src1[lane], 1)) { |
| // is NaN |
| if (std::isnan(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 2)) { |
| // is -infinity |
| if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 3)) { |
| // is -normal |
| if (std::isnormal(src0[lane]) |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 4)) { |
| // is -denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 5)) { |
| // is -zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 6)) { |
| // is +zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 7)) { |
| // is +denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 8)) { |
| // is +normal |
| if (std::isnormal(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 9)) { |
| // is +infinity |
| if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_class_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_CLASS_F64 |
| |
| Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_CLASS_F64 |
| |
| // VCC = IEEE numeric class function specified in S1.u, performed on S0.d |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (bits(src1[lane], 0) || bits(src1[lane], 1)) { |
| // is NaN |
| if (std::isnan(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 2)) { |
| // is -infinity |
| if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 3)) { |
| // is -normal |
| if (std::isnormal(src0[lane]) |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 4)) { |
| // is -denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 5)) { |
| // is -zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 6)) { |
| // is +zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 7)) { |
| // is +denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 8)) { |
| // is +normal |
| if (std::isnormal(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 9)) { |
| // is +infinity |
| if (std::isinf(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_class_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_CLASS_F64 |
| |
| Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_CLASS_F64 |
| |
| // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on |
| // S0.d The function reports true if the floating point value is any of |
| // the numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (bits(src1[lane], 0) || bits(src1[lane], 1)) { |
| // is NaN |
| if (std::isnan(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 2)) { |
| // is -infinity |
| if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 3)) { |
| // is -normal |
| if (std::isnormal(src0[lane]) |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 4)) { |
| // is -denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 5)) { |
| // is -zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 6)) { |
| // is +zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 7)) { |
| // is +denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 8)) { |
| // is +normal |
| if (std::isnormal(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 9)) { |
| // is +infinity |
| if (std::isinf(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| vcc.setBit(lane, 1); |
| continue; |
| } |
| } |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_class_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_CLASS_F16 |
| |
| Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_CLASS_F16 |
| |
| // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_class_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_CLASS_F16 |
| |
| Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_CLASS_F16 |
| |
| // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on |
| // S0.f16 |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_f_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_F_F16 |
| |
| Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_F_F16 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lt_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_LT_F16 |
| |
| Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_LT_F16 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_eq_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_EQ_F16 |
| |
| Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_EQ_F16 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_le_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_LE_F16 |
| |
| Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_LE_F16 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_gt_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_GT_F16 |
| |
| Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_GT_F16 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lg_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_LG_F16 |
| |
| Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_LG_F16 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ge_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_GE_F16 |
| |
| Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_GE_F16 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_o_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_O_F16 |
| |
| Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_O_F16 |
| |
| // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_u_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_U_F16 |
| |
| Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_U_F16 |
| |
| // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nge_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_NGE_F16 |
| |
| Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_NGE_F16 |
| |
| // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nlg_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_NLG_F16 |
| |
| Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_NLG_F16 |
| |
| // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ngt_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_NGT_F16 |
| |
| Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_NGT_F16 |
| |
| // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nle_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_NLE_F16 |
| |
| Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_NLE_F16 |
| |
| // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_neq_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_NEQ_F16 |
| |
| Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_NEQ_F16 |
| |
| // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nlt_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_NLT_F16 |
| |
| Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_NLT_F16 |
| |
| // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_tru_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMP_TRU_F16 |
| |
| Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16() |
| { |
| } // ~Inst_VOPC__V_CMP_TRU_F16 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_f_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_F_F16 |
| |
| Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_F_F16 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lt_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_LT_F16 |
| |
| Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_LT_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_eq_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_EQ_F16 |
| |
| Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_EQ_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_le_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_LE_F16 |
| |
| Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_LE_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_gt_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_GT_F16 |
| |
| Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_GT_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lg_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_LG_F16 |
| |
| Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_LG_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ge_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_GE_F16 |
| |
| Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_GE_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_o_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_O_F16 |
| |
| Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_O_F16 |
| |
| // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_u_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_U_F16 |
| |
| Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_U_F16 |
| |
| // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nge_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_NGE_F16 |
| |
| Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_NGE_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nlg_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_NLG_F16 |
| |
| Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_NLG_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ngt_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_NGT_F16 |
| |
| Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_NGT_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nle_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_NLE_F16 |
| |
| Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_NLE_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_neq_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_NEQ_F16 |
| |
| Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_NEQ_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nlt_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_NLT_F16 |
| |
| Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_NLT_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_tru_f16") |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOPC__V_CMPX_TRU_F16 |
| |
| Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16() |
| { |
| } // ~Inst_VOPC__V_CMPX_TRU_F16 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_f_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_F_F32 |
| |
| Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_F_F32 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lt_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_LT_F32 |
| |
| Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_LT_F32 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_eq_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_EQ_F32 |
| |
| Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_EQ_F32 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_le_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_LE_F32 |
| |
| Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_LE_F32 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_gt_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_GT_F32 |
| |
| Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_GT_F32 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lg_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_LG_F32 |
| |
| Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_LG_F32 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ge_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_GE_F32 |
| |
| Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_GE_F32 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_o_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_O_F32 |
| |
| Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_O_F32 |
| |
| // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (!std::isnan(src0[lane]) |
| && !std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_u_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_U_F32 |
| |
| Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_U_F32 |
| |
| // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (std::isnan(src0[lane]) |
| || std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nge_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_NGE_F32 |
| |
| Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_NGE_F32 |
| |
| // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nlg_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_NLG_F32 |
| |
| Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_NLG_F32 |
| |
| // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ngt_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_NGT_F32 |
| |
| Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_NGT_F32 |
| |
| // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nle_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_NLE_F32 |
| |
| Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_NLE_F32 |
| |
| // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_neq_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_NEQ_F32 |
| |
| Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_NEQ_F32 |
| |
| // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nlt_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_NLT_F32 |
| |
| Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_NLT_F32 |
| |
| // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_tru_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMP_TRU_F32 |
| |
| Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32() |
| { |
| } // ~Inst_VOPC__V_CMP_TRU_F32 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_f_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_F_F32 |
| |
| Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_F_F32 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lt_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_LT_F32 |
| |
| Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_LT_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_eq_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_EQ_F32 |
| |
| Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_EQ_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_le_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_LE_F32 |
| |
| Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_LE_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_gt_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_GT_F32 |
| |
| Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_GT_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lg_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_LG_F32 |
| |
| Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_LG_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ge_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_GE_F32 |
| |
| Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_GE_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_o_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_O_F32 |
| |
| Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_O_F32 |
| |
| // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (!std::isnan(src0[lane]) |
| && !std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_u_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_U_F32 |
| |
| Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_U_F32 |
| |
| // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (std::isnan(src0[lane]) |
| || std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nge_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_NGE_F32 |
| |
| Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_NGE_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nlg_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_NLG_F32 |
| |
| Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_NLG_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ngt_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_NGT_F32 |
| |
| Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_NGT_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nle_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_NLE_F32 |
| |
| Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_NLE_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_neq_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_NEQ_F32 |
| |
| Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_NEQ_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nlt_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_NLT_F32 |
| |
| Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_NLT_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_tru_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOPC__V_CMPX_TRU_F32 |
| |
| Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32() |
| { |
| } // ~Inst_VOPC__V_CMPX_TRU_F32 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_f_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_F_F64 |
| |
| Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_F_F64 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lt_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_LT_F64 |
| |
| Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_LT_F64 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_eq_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_EQ_F64 |
| |
| Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_EQ_F64 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_le_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_LE_F64 |
| |
| Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_LE_F64 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_gt_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_GT_F64 |
| |
| Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_GT_F64 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lg_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_LG_F64 |
| |
| Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_LG_F64 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ge_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_GE_F64 |
| |
| Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_GE_F64 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_o_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_O_F64 |
| |
| Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_O_F64 |
| |
| // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (!std::isnan(src0[lane]) |
| && !std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_u_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_U_F64 |
| |
| Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_U_F64 |
| |
| // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (std::isnan(src0[lane]) |
| || std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nge_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_NGE_F64 |
| |
| Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_NGE_F64 |
| |
| // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nlg_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_NLG_F64 |
| |
| Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_NLG_F64 |
| |
| // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ngt_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_NGT_F64 |
| |
| Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_NGT_F64 |
| |
| // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nle_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_NLE_F64 |
| |
| Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_NLE_F64 |
| |
| // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_neq_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_NEQ_F64 |
| |
| Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_NEQ_F64 |
| |
| // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_nlt_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_NLT_F64 |
| |
| Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_NLT_F64 |
| |
| // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_tru_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMP_TRU_F64 |
| |
| Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64() |
| { |
| } // ~Inst_VOPC__V_CMP_TRU_F64 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_f_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_F_F64 |
| |
| Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_F_F64 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lt_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_LT_F64 |
| |
| Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_LT_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_eq_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_EQ_F64 |
| |
| Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_EQ_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| wf->execMask() = vcc.rawData(); |
| } |
| |
| Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_le_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_LE_F64 |
| |
| Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_LE_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_gt_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_GT_F64 |
| |
| Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_GT_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lg_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_LG_F64 |
| |
| Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_LG_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ge_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_GE_F64 |
| |
| Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_GE_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_o_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_O_F64 |
| |
| Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_O_F64 |
| |
| // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (!std::isnan(src0[lane]) |
| && !std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_u_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_U_F64 |
| |
| Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_U_F64 |
| |
| // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, (std::isnan(src0[lane]) |
| || std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nge_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_NGE_F64 |
| |
| Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_NGE_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nlg_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_NLG_F64 |
| |
| Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_NLG_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ngt_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_NGT_F64 |
| |
| Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_NGT_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nle_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_NLE_F64 |
| |
| Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_NLE_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_neq_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_NEQ_F64 |
| |
| Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_NEQ_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_nlt_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_NLT_F64 |
| |
| Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_NLT_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_tru_f64") |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOPC__V_CMPX_TRU_F64 |
| |
| Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64() |
| { |
| } // ~Inst_VOPC__V_CMPX_TRU_F64 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_f_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_F_I16 |
| |
| Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16() |
| { |
| } // ~Inst_VOPC__V_CMP_F_I16 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lt_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LT_I16 |
| |
| Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16() |
| { |
| } // ~Inst_VOPC__V_CMP_LT_I16 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_eq_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_EQ_I16 |
| |
| Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16() |
| { |
| } // ~Inst_VOPC__V_CMP_EQ_I16 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_le_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LE_I16 |
| |
| Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16() |
| { |
| } // ~Inst_VOPC__V_CMP_LE_I16 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_gt_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GT_I16 |
| |
| Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16() |
| { |
| } // ~Inst_VOPC__V_CMP_GT_I16 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ne_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_NE_I16 |
| |
| Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16() |
| { |
| } // ~Inst_VOPC__V_CMP_NE_I16 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ge_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GE_I16 |
| |
| Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16() |
| { |
| } // ~Inst_VOPC__V_CMP_GE_I16 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_t_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_T_I16 |
| |
| Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16() |
| { |
| } // ~Inst_VOPC__V_CMP_T_I16 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_f_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_F_U16 |
| |
| Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16() |
| { |
| } // ~Inst_VOPC__V_CMP_F_U16 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lt_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LT_U16 |
| |
| Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16() |
| { |
| } // ~Inst_VOPC__V_CMP_LT_U16 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_eq_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_EQ_U16 |
| |
| Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16() |
| { |
| } // ~Inst_VOPC__V_CMP_EQ_U16 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_le_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LE_U16 |
| |
| Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16() |
| { |
| } // ~Inst_VOPC__V_CMP_LE_U16 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_gt_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GT_U16 |
| |
| Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16() |
| { |
| } // ~Inst_VOPC__V_CMP_GT_U16 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ne_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_NE_U16 |
| |
| Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16() |
| { |
| } // ~Inst_VOPC__V_CMP_NE_U16 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ge_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GE_U16 |
| |
| Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16() |
| { |
| } // ~Inst_VOPC__V_CMP_GE_U16 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_t_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_T_U16 |
| |
| Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16() |
| { |
| } // ~Inst_VOPC__V_CMP_T_U16 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_f_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_F_I16 |
| |
| Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16() |
| { |
| } // ~Inst_VOPC__V_CMPX_F_I16 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lt_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LT_I16 |
| |
| Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16() |
| { |
| } // ~Inst_VOPC__V_CMPX_LT_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_eq_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_EQ_I16 |
| |
| Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16() |
| { |
| } // ~Inst_VOPC__V_CMPX_EQ_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_le_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LE_I16 |
| |
| Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16() |
| { |
| } // ~Inst_VOPC__V_CMPX_LE_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_gt_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GT_I16 |
| |
| Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16() |
| { |
| } // ~Inst_VOPC__V_CMPX_GT_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ne_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_NE_I16 |
| |
| Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16() |
| { |
| } // ~Inst_VOPC__V_CMPX_NE_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ge_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GE_I16 |
| |
| Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16() |
| { |
| } // ~Inst_VOPC__V_CMPX_GE_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_t_i16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_T_I16 |
| |
| Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16() |
| { |
| } // ~Inst_VOPC__V_CMPX_T_I16 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_f_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_F_U16 |
| |
| Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16() |
| { |
| } // ~Inst_VOPC__V_CMPX_F_U16 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lt_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LT_U16 |
| |
| Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16() |
| { |
| } // ~Inst_VOPC__V_CMPX_LT_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_eq_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_EQ_U16 |
| |
| Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16() |
| { |
| } // ~Inst_VOPC__V_CMPX_EQ_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_le_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LE_U16 |
| |
| Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16() |
| { |
| } // ~Inst_VOPC__V_CMPX_LE_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_gt_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GT_U16 |
| |
| Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16() |
| { |
| } // ~Inst_VOPC__V_CMPX_GT_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ne_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_NE_U16 |
| |
| Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16() |
| { |
| } // ~Inst_VOPC__V_CMPX_NE_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ge_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GE_U16 |
| |
| Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16() |
| { |
| } // ~Inst_VOPC__V_CMPX_GE_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_t_u16") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_T_U16 |
| |
| Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16() |
| { |
| } // ~Inst_VOPC__V_CMPX_T_U16 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_f_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_F_I32 |
| |
| Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32() |
| { |
| } // ~Inst_VOPC__V_CMP_F_I32 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LT_I32 |
| |
| Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32() |
| { |
| } // ~Inst_VOPC__V_CMP_LT_I32 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_eq_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_EQ_I32 |
| |
| Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32() |
| { |
| } // ~Inst_VOPC__V_CMP_EQ_I32 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_le_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LE_I32 |
| |
| Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32() |
| { |
| } // ~Inst_VOPC__V_CMP_LE_I32 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_gt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GT_I32 |
| |
| Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32() |
| { |
| } // ~Inst_VOPC__V_CMP_GT_I32 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ne_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_NE_I32 |
| |
| Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32() |
| { |
| } // ~Inst_VOPC__V_CMP_NE_I32 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ge_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GE_I32 |
| |
| Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32() |
| { |
| } // ~Inst_VOPC__V_CMP_GE_I32 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_t_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_T_I32 |
| |
| Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32() |
| { |
| } // ~Inst_VOPC__V_CMP_T_I32 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_f_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_F_U32 |
| |
| Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32() |
| { |
| } // ~Inst_VOPC__V_CMP_F_U32 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LT_U32 |
| |
| Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32() |
| { |
| } // ~Inst_VOPC__V_CMP_LT_U32 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_eq_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_EQ_U32 |
| |
| Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32() |
| { |
| } // ~Inst_VOPC__V_CMP_EQ_U32 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_le_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LE_U32 |
| |
| Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32() |
| { |
| } // ~Inst_VOPC__V_CMP_LE_U32 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_gt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GT_U32 |
| |
| Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32() |
| { |
| } // ~Inst_VOPC__V_CMP_GT_U32 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ne_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_NE_U32 |
| |
| Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32() |
| { |
| } // ~Inst_VOPC__V_CMP_NE_U32 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ge_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GE_U32 |
| |
| Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32() |
| { |
| } // ~Inst_VOPC__V_CMP_GE_U32 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_t_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_T_U32 |
| |
| Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32() |
| { |
| } // ~Inst_VOPC__V_CMP_T_U32 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_f_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_F_I32 |
| |
| Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32() |
| { |
| } // ~Inst_VOPC__V_CMPX_F_I32 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LT_I32 |
| |
| Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32() |
| { |
| } // ~Inst_VOPC__V_CMPX_LT_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_eq_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_EQ_I32 |
| |
| Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32() |
| { |
| } // ~Inst_VOPC__V_CMPX_EQ_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_le_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LE_I32 |
| |
| Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32() |
| { |
| } // ~Inst_VOPC__V_CMPX_LE_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_gt_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GT_I32 |
| |
| Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32() |
| { |
| } // ~Inst_VOPC__V_CMPX_GT_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ne_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_NE_I32 |
| |
| Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32() |
| { |
| } // ~Inst_VOPC__V_CMPX_NE_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ge_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GE_I32 |
| |
| Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32() |
| { |
| } // ~Inst_VOPC__V_CMPX_GE_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_t_i32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_T_I32 |
| |
| Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32() |
| { |
| } // ~Inst_VOPC__V_CMPX_T_I32 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_f_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_F_U32 |
| |
| Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32() |
| { |
| } // ~Inst_VOPC__V_CMPX_F_U32 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LT_U32 |
| |
| Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32() |
| { |
| } // ~Inst_VOPC__V_CMPX_LT_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_eq_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_EQ_U32 |
| |
| Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32() |
| { |
| } // ~Inst_VOPC__V_CMPX_EQ_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_le_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LE_U32 |
| |
| Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32() |
| { |
| } // ~Inst_VOPC__V_CMPX_LE_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_gt_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GT_U32 |
| |
| Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32() |
| { |
| } // ~Inst_VOPC__V_CMPX_GT_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ne_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_NE_U32 |
| |
| Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32() |
| { |
| } // ~Inst_VOPC__V_CMPX_NE_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ge_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GE_U32 |
| |
| Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32() |
| { |
| } // ~Inst_VOPC__V_CMPX_GE_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_t_u32") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_T_U32 |
| |
| Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32() |
| { |
| } // ~Inst_VOPC__V_CMPX_T_U32 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_f_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_F_I64 |
| |
| Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64() |
| { |
| } // ~Inst_VOPC__V_CMP_F_I64 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lt_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LT_I64 |
| |
| Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64() |
| { |
| } // ~Inst_VOPC__V_CMP_LT_I64 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_eq_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_EQ_I64 |
| |
| Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64() |
| { |
| } // ~Inst_VOPC__V_CMP_EQ_I64 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_le_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LE_I64 |
| |
| Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64() |
| { |
| } // ~Inst_VOPC__V_CMP_LE_I64 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_gt_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GT_I64 |
| |
| Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64() |
| { |
| } // ~Inst_VOPC__V_CMP_GT_I64 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ne_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_NE_I64 |
| |
| Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64() |
| { |
| } // ~Inst_VOPC__V_CMP_NE_I64 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ge_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GE_I64 |
| |
| Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64() |
| { |
| } // ~Inst_VOPC__V_CMP_GE_I64 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_t_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_T_I64 |
| |
| Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64() |
| { |
| } // ~Inst_VOPC__V_CMP_T_I64 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_f_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_F_U64 |
| |
| Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64() |
| { |
| } // ~Inst_VOPC__V_CMP_F_U64 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_lt_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LT_U64 |
| |
| Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64() |
| { |
| } // ~Inst_VOPC__V_CMP_LT_U64 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_eq_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_EQ_U64 |
| |
| Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64() |
| { |
| } // ~Inst_VOPC__V_CMP_EQ_U64 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_le_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_LE_U64 |
| |
| Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64() |
| { |
| } // ~Inst_VOPC__V_CMP_LE_U64 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_gt_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GT_U64 |
| |
| Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64() |
| { |
| } // ~Inst_VOPC__V_CMP_GT_U64 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ne_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_NE_U64 |
| |
| Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64() |
| { |
| } // ~Inst_VOPC__V_CMP_NE_U64 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_ge_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_GE_U64 |
| |
| Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64() |
| { |
| } // ~Inst_VOPC__V_CMP_GE_U64 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmp_t_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMP_T_U64 |
| |
| Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64() |
| { |
| } // ~Inst_VOPC__V_CMP_T_U64 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_f_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_F_I64 |
| |
| Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64() |
| { |
| } // ~Inst_VOPC__V_CMPX_F_I64 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lt_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LT_I64 |
| |
| Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64() |
| { |
| } // ~Inst_VOPC__V_CMPX_LT_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_eq_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_EQ_I64 |
| |
| Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64() |
| { |
| } // ~Inst_VOPC__V_CMPX_EQ_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_le_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LE_I64 |
| |
| Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64() |
| { |
| } // ~Inst_VOPC__V_CMPX_LE_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_gt_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GT_I64 |
| |
| Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64() |
| { |
| } // ~Inst_VOPC__V_CMPX_GT_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ne_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_NE_I64 |
| |
| Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64() |
| { |
| } // ~Inst_VOPC__V_CMPX_NE_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ge_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GE_I64 |
| |
| Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64() |
| { |
| } // ~Inst_VOPC__V_CMPX_GE_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_t_i64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_T_I64 |
| |
| Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64() |
| { |
| } // ~Inst_VOPC__V_CMPX_T_I64 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_f_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_F_U64 |
| |
| Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64() |
| { |
| } // ~Inst_VOPC__V_CMPX_F_U64 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_lt_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LT_U64 |
| |
| Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64() |
| { |
| } // ~Inst_VOPC__V_CMPX_LT_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_eq_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_EQ_U64 |
| |
| Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64() |
| { |
| } // ~Inst_VOPC__V_CMPX_EQ_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_le_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_LE_U64 |
| |
| Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64() |
| { |
| } // ~Inst_VOPC__V_CMPX_LE_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_gt_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GT_U64 |
| |
| Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64() |
| { |
| } // ~Inst_VOPC__V_CMPX_GT_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ne_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_NE_U64 |
| |
| Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64() |
| { |
| } // ~Inst_VOPC__V_CMPX_NE_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_ge_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_GE_U64 |
| |
| Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64() |
| { |
| } // ~Inst_VOPC__V_CMPX_GE_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt) |
| : Inst_VOPC(iFmt, "v_cmpx_t_u64") |
| { |
| setFlag(ALU); |
| } // Inst_VOPC__V_CMPX_T_U64 |
| |
| Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64() |
| { |
| } // ~Inst_VOPC__V_CMPX_T_U64 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = vcc.rawData(); |
| vcc.write(); |
| } |
| |
| Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32( |
| InFmt_VINTRP *iFmt) |
| : Inst_VINTRP(iFmt, "v_interp_p1_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VINTRP__V_INTERP_P1_F32 |
| |
| Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32() |
| { |
| } // ~Inst_VINTRP__V_INTERP_P1_F32 |
| |
| // D.f = P10 * S.f + P0; parameter interpolation |
| void |
| Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32( |
| InFmt_VINTRP *iFmt) |
| : Inst_VINTRP(iFmt, "v_interp_p2_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VINTRP__V_INTERP_P2_F32 |
| |
| Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32() |
| { |
| } // ~Inst_VINTRP__V_INTERP_P2_F32 |
| |
| // D.f = P20 * S.f + D.f; parameter interpolation |
| void |
| Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32( |
| InFmt_VINTRP *iFmt) |
| : Inst_VINTRP(iFmt, "v_interp_mov_f32") |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VINTRP__V_INTERP_MOV_F32 |
| |
| Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32() |
| { |
| } // ~Inst_VINTRP__V_INTERP_MOV_F32 |
| |
| void |
| Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_class_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_CLASS_F32 |
| |
| Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_CLASS_F32 |
| |
| // VCC = IEEE numeric class function specified in S1.u, performed on S0.f |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (bits(src1[lane], 0) || bits(src1[lane], 1)) { |
| // is NaN |
| if (std::isnan(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 2)) { |
| // is -infinity |
| if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 3)) { |
| // is -normal |
| if (std::isnormal(src0[lane]) |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 4)) { |
| // is -denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 5)) { |
| // is -zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 6)) { |
| // is +zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 7)) { |
| // is +denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 8)) { |
| // is +normal |
| if (std::isnormal(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 9)) { |
| // is +infinity |
| if (std::isinf(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_class_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_CLASS_F32 |
| |
| Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_CLASS_F32 |
| |
| // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on |
| // S0.f |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (bits(src1[lane], 0) || bits(src1[lane], 1)) { |
| // is NaN |
| if (std::isnan(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 2)) { |
| // is -infinity |
| if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 3)) { |
| // is -normal |
| if (std::isnormal(src0[lane]) |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 4)) { |
| // is -denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 5)) { |
| // is -zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 6)) { |
| // is +zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 7)) { |
| // is +denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 8)) { |
| // is +normal |
| if (std::isnormal(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 9)) { |
| // is +infinity |
| if (std::isinf(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_class_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_CLASS_F64 |
| |
| Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_CLASS_F64 |
| |
| // VCC = IEEE numeric class function specified in S1.u, performed on S0.d |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (bits(src1[lane], 0) || bits(src1[lane], 1)) { |
| // is NaN |
| if (std::isnan(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 2)) { |
| // is -infinity |
| if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 3)) { |
| // is -normal |
| if (std::isnormal(src0[lane]) |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 4)) { |
| // is -denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 5)) { |
| // is -zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 6)) { |
| // is +zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 7)) { |
| // is +denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 8)) { |
| // is +normal |
| if (std::isnormal(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 9)) { |
| // is +infinity |
| if (std::isinf(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_class_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_CLASS_F64 |
| |
| Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_CLASS_F64 |
| |
| // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on |
| // S0.d |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (bits(src1[lane], 0) || bits(src1[lane], 1)) { |
| // is NaN |
| if (std::isnan(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 2)) { |
| // is -infinity |
| if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 3)) { |
| // is -normal |
| if (std::isnormal(src0[lane]) |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 4)) { |
| // is -denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 5)) { |
| // is -zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 6)) { |
| // is +zero |
| if (std::fpclassify(src0[lane]) == FP_ZERO |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 7)) { |
| // is +denormal |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 8)) { |
| // is +normal |
| if (std::isnormal(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| if (bits(src1[lane], 9)) { |
| // is +infinity |
| if (std::isinf(src0[lane]) |
| && !std::signbit(src0[lane])) { |
| sdst.setBit(lane, 1); |
| continue; |
| } |
| } |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_class_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_CLASS_F16 |
| |
| Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_CLASS_F16 |
| |
| // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_class_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_CLASS_F16 |
| |
| Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_CLASS_F16 |
| |
| // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on |
| // S0.f16 |
| // The function reports true if the floating point value is any of the |
| // numeric types selected in S1.u according to the following list: |
| // S1.u[0] -- value is a signaling NaN. |
| // S1.u[1] -- value is a quiet NaN. |
| // S1.u[2] -- value is negative infinity. |
| // S1.u[3] -- value is a negative normal value. |
| // S1.u[4] -- value is a negative denormal value. |
| // S1.u[5] -- value is negative zero. |
| // S1.u[6] -- value is positive zero. |
| // S1.u[7] -- value is a positive denormal value. |
| // S1.u[8] -- value is a positive normal value. |
| // S1.u[9] -- value is positive infinity. |
| void |
| Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_f_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_F_F16 |
| |
| Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_F_F16 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lt_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_LT_F16 |
| |
| Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_LT_F16 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_eq_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_EQ_F16 |
| |
| Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_EQ_F16 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_le_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_LE_F16 |
| |
| Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_LE_F16 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_gt_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_GT_F16 |
| |
| Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_GT_F16 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lg_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_LG_F16 |
| |
| Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_LG_F16 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ge_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_GE_F16 |
| |
| Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_GE_F16 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_o_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_O_F16 |
| |
| Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_O_F16 |
| |
| // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_u_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_U_F16 |
| |
| Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_U_F16 |
| |
| // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nge_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_NGE_F16 |
| |
| Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_NGE_F16 |
| |
| // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nlg_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_NLG_F16 |
| |
| Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_NLG_F16 |
| |
| // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ngt_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_NGT_F16 |
| |
| Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_NGT_F16 |
| |
| // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nle_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_NLE_F16 |
| |
| Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_NLE_F16 |
| |
| // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_neq_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_NEQ_F16 |
| |
| Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_NEQ_F16 |
| |
| // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nlt_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_NLT_F16 |
| |
| Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_NLT_F16 |
| |
| // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_tru_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMP_TRU_F16 |
| |
| Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16() |
| { |
| } // ~Inst_VOP3__V_CMP_TRU_F16 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_f_f16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_F_F16 |
| |
| Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_F_F16 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lt_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_LT_F16 |
| |
| Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_LT_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_eq_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_EQ_F16 |
| |
| Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_EQ_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_le_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_LE_F16 |
| |
| Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_LE_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_gt_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_GT_F16 |
| |
| Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_GT_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lg_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_LG_F16 |
| |
| Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_LG_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ge_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_GE_F16 |
| |
| Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_GE_F16 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_o_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_O_F16 |
| |
| Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_O_F16 |
| |
| // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_u_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_U_F16 |
| |
| Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_U_F16 |
| |
| // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nge_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_NGE_F16 |
| |
| Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_NGE_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nlg_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_NLG_F16 |
| |
| Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_NLG_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ngt_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_NGT_F16 |
| |
| Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_NGT_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nle_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_NLE_F16 |
| |
| Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_NLE_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_neq_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_NEQ_F16 |
| |
| Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_NEQ_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nlt_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_NLT_F16 |
| |
| Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_NLT_F16 |
| |
| // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_tru_f16", true) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CMPX_TRU_F16 |
| |
| Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16() |
| { |
| } // ~Inst_VOP3__V_CMPX_TRU_F16 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_f_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_F_F32 |
| |
| Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_F_F32 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lt_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_LT_F32 |
| |
| Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_LT_F32 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_eq_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_EQ_F32 |
| |
| Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_EQ_F32 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_le_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_LE_F32 |
| |
| Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_LE_F32 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_gt_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_GT_F32 |
| |
| Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_GT_F32 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lg_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_LG_F32 |
| |
| Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_LG_F32 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ge_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_GE_F32 |
| |
| Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_GE_F32 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_o_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_O_F32 |
| |
| Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_O_F32 |
| |
| // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (!std::isnan(src0[lane]) |
| && !std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_u_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_U_F32 |
| |
| Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_U_F32 |
| |
| // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (std::isnan(src0[lane]) |
| || std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nge_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_NGE_F32 |
| |
| Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_NGE_F32 |
| |
| // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nlg_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_NLG_F32 |
| |
| Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_NLG_F32 |
| |
| // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ngt_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_NGT_F32 |
| |
| Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_NGT_F32 |
| |
| // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nle_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_NLE_F32 |
| |
| Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_NLE_F32 |
| |
| // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_neq_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_NEQ_F32 |
| |
| Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_NEQ_F32 |
| |
| // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nlt_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_NLT_F32 |
| |
| Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_NLT_F32 |
| |
| // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_tru_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMP_TRU_F32 |
| |
| Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32() |
| { |
| } // ~Inst_VOP3__V_CMP_TRU_F32 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_f_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_F_F32 |
| |
| Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_F_F32 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lt_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_LT_F32 |
| |
| Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_LT_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_eq_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_EQ_F32 |
| |
| Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_EQ_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_le_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_LE_F32 |
| |
| Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_LE_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_gt_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_GT_F32 |
| |
| Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_GT_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lg_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_LG_F32 |
| |
| Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_LG_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ge_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_GE_F32 |
| |
| Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_GE_F32 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_o_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_O_F32 |
| |
| Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_O_F32 |
| |
| // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (!std::isnan(src0[lane]) |
| && !std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_u_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_U_F32 |
| |
| Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_U_F32 |
| |
| // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (std::isnan(src0[lane]) |
| || std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nge_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_NGE_F32 |
| |
| Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_NGE_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nlg_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_NLG_F32 |
| |
| Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_NLG_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ngt_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_NGT_F32 |
| |
| Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_NGT_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nle_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_NLE_F32 |
| |
| Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_NLE_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_neq_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_NEQ_F32 |
| |
| Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_NEQ_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nlt_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_NLT_F32 |
| |
| Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_NLT_F32 |
| |
| // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_tru_f32", true) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CMPX_TRU_F32 |
| |
| Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32() |
| { |
| } // ~Inst_VOP3__V_CMPX_TRU_F32 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_f_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_F_F64 |
| |
| Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_F_F64 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lt_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_LT_F64 |
| |
| Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_LT_F64 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_eq_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_EQ_F64 |
| |
| Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_EQ_F64 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_le_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_LE_F64 |
| |
| Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_LE_F64 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_gt_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_GT_F64 |
| |
| Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_GT_F64 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lg_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_LG_F64 |
| |
| Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_LG_F64 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ge_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_GE_F64 |
| |
| Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_GE_F64 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_o_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_O_F64 |
| |
| Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_O_F64 |
| |
| // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (!std::isnan(src0[lane]) |
| && !std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_u_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_U_F64 |
| |
| Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_U_F64 |
| |
| // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (std::isnan(src0[lane]) |
| || std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nge_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_NGE_F64 |
| |
| Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_NGE_F64 |
| |
| // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nlg_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_NLG_F64 |
| |
| Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_NLG_F64 |
| |
| // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ngt_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_NGT_F64 |
| |
| Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_NGT_F64 |
| |
| // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nle_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_NLE_F64 |
| |
| Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_NLE_F64 |
| |
| // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_neq_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_NEQ_F64 |
| |
| Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_NEQ_F64 |
| |
| // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_nlt_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_NLT_F64 |
| |
| Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_NLT_F64 |
| |
| // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_tru_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMP_TRU_F64 |
| |
| Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64() |
| { |
| } // ~Inst_VOP3__V_CMP_TRU_F64 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_f_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_F_F64 |
| |
| Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_F_F64 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lt_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_LT_F64 |
| |
| Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_LT_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_eq_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_EQ_F64 |
| |
| Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_EQ_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_le_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_LE_F64 |
| |
| Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_LE_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_gt_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_GT_F64 |
| |
| Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_GT_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lg_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_LG_F64 |
| |
| Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_LG_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ge_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_GE_F64 |
| |
| Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_GE_F64 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_o_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_O_F64 |
| |
| Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_O_F64 |
| |
| // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (!std::isnan(src0[lane]) |
| && !std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_u_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_U_F64 |
| |
| Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_U_F64 |
| |
| // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC |
| // encoding. |
| void |
| Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, (std::isnan(src0[lane]) |
| || std::isnan(src1[lane])) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nge_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_NGE_F64 |
| |
| Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_NGE_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nlg_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_NLG_F64 |
| |
| Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_NLG_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] < src1[lane] |
| || src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ngt_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_NGT_F64 |
| |
| Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_NGT_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nle_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_NLE_F64 |
| |
| Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_NLE_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_neq_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_NEQ_F64 |
| |
| Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_NEQ_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_nlt_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_NLT_F64 |
| |
| Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_NLT_F64 |
| |
| // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_tru_f64", true) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CMPX_TRU_F64 |
| |
| Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64() |
| { |
| } // ~Inst_VOP3__V_CMPX_TRU_F64 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_f_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_F_I16 |
| |
| Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16() |
| { |
| } // ~Inst_VOP3__V_CMP_F_I16 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lt_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LT_I16 |
| |
| Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16() |
| { |
| } // ~Inst_VOP3__V_CMP_LT_I16 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_eq_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_EQ_I16 |
| |
| Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16() |
| { |
| } // ~Inst_VOP3__V_CMP_EQ_I16 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_le_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LE_I16 |
| |
| Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16() |
| { |
| } // ~Inst_VOP3__V_CMP_LE_I16 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_gt_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GT_I16 |
| |
| Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16() |
| { |
| } // ~Inst_VOP3__V_CMP_GT_I16 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ne_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_NE_I16 |
| |
| Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16() |
| { |
| } // ~Inst_VOP3__V_CMP_NE_I16 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ge_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GE_I16 |
| |
| Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16() |
| { |
| } // ~Inst_VOP3__V_CMP_GE_I16 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_t_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_T_I16 |
| |
| Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16() |
| { |
| } // ~Inst_VOP3__V_CMP_T_I16 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_f_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_F_U16 |
| |
| Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16() |
| { |
| } // ~Inst_VOP3__V_CMP_F_U16 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lt_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LT_U16 |
| |
| Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16() |
| { |
| } // ~Inst_VOP3__V_CMP_LT_U16 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_eq_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_EQ_U16 |
| |
| Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16() |
| { |
| } // ~Inst_VOP3__V_CMP_EQ_U16 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_le_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LE_U16 |
| |
| Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16() |
| { |
| } // ~Inst_VOP3__V_CMP_LE_U16 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_gt_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GT_U16 |
| |
| Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16() |
| { |
| } // ~Inst_VOP3__V_CMP_GT_U16 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ne_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_NE_U16 |
| |
| Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16() |
| { |
| } // ~Inst_VOP3__V_CMP_NE_U16 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ge_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GE_U16 |
| |
| Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16() |
| { |
| } // ~Inst_VOP3__V_CMP_GE_U16 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_t_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_T_U16 |
| |
| Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16() |
| { |
| } // ~Inst_VOP3__V_CMP_T_U16 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_f_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_F_I16 |
| |
| Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16() |
| { |
| } // ~Inst_VOP3__V_CMPX_F_I16 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lt_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LT_I16 |
| |
| Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16() |
| { |
| } // ~Inst_VOP3__V_CMPX_LT_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_eq_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_EQ_I16 |
| |
| Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16() |
| { |
| } // ~Inst_VOP3__V_CMPX_EQ_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_le_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LE_I16 |
| |
| Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16() |
| { |
| } // ~Inst_VOP3__V_CMPX_LE_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_gt_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GT_I16 |
| |
| Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16() |
| { |
| } // ~Inst_VOP3__V_CMPX_GT_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ne_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_NE_I16 |
| |
| Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16() |
| { |
| } // ~Inst_VOP3__V_CMPX_NE_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ge_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GE_I16 |
| |
| Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16() |
| { |
| } // ~Inst_VOP3__V_CMPX_GE_I16 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_t_i16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_T_I16 |
| |
| Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16() |
| { |
| } // ~Inst_VOP3__V_CMPX_T_I16 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_f_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_F_U16 |
| |
| Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16() |
| { |
| } // ~Inst_VOP3__V_CMPX_F_U16 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lt_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LT_U16 |
| |
| Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16() |
| { |
| } // ~Inst_VOP3__V_CMPX_LT_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_eq_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_EQ_U16 |
| |
| Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16() |
| { |
| } // ~Inst_VOP3__V_CMPX_EQ_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_le_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LE_U16 |
| |
| Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16() |
| { |
| } // ~Inst_VOP3__V_CMPX_LE_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_gt_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GT_U16 |
| |
| Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16() |
| { |
| } // ~Inst_VOP3__V_CMPX_GT_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ne_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_NE_U16 |
| |
| Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16() |
| { |
| } // ~Inst_VOP3__V_CMPX_NE_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ge_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GE_U16 |
| |
| Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16() |
| { |
| } // ~Inst_VOP3__V_CMPX_GE_U16 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_t_u16", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_T_U16 |
| |
| Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16() |
| { |
| } // ~Inst_VOP3__V_CMPX_T_U16 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_f_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_F_I32 |
| |
| Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32() |
| { |
| } // ~Inst_VOP3__V_CMP_F_I32 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lt_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LT_I32 |
| |
| Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32() |
| { |
| } // ~Inst_VOP3__V_CMP_LT_I32 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_eq_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_EQ_I32 |
| |
| Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32() |
| { |
| } // ~Inst_VOP3__V_CMP_EQ_I32 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_le_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LE_I32 |
| |
| Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32() |
| { |
| } // ~Inst_VOP3__V_CMP_LE_I32 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_gt_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GT_I32 |
| |
| Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32() |
| { |
| } // ~Inst_VOP3__V_CMP_GT_I32 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ne_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_NE_I32 |
| |
| Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32() |
| { |
| } // ~Inst_VOP3__V_CMP_NE_I32 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ge_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GE_I32 |
| |
| Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32() |
| { |
| } // ~Inst_VOP3__V_CMP_GE_I32 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_t_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_T_I32 |
| |
| Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32() |
| { |
| } // ~Inst_VOP3__V_CMP_T_I32 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_f_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_F_U32 |
| |
| Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32() |
| { |
| } // ~Inst_VOP3__V_CMP_F_U32 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lt_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LT_U32 |
| |
| Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32() |
| { |
| } // ~Inst_VOP3__V_CMP_LT_U32 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_eq_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_EQ_U32 |
| |
| Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32() |
| { |
| } // ~Inst_VOP3__V_CMP_EQ_U32 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_le_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LE_U32 |
| |
| Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32() |
| { |
| } // ~Inst_VOP3__V_CMP_LE_U32 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_gt_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GT_U32 |
| |
| Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32() |
| { |
| } // ~Inst_VOP3__V_CMP_GT_U32 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ne_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_NE_U32 |
| |
| Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32() |
| { |
| } // ~Inst_VOP3__V_CMP_NE_U32 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ge_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GE_U32 |
| |
| Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32() |
| { |
| } // ~Inst_VOP3__V_CMP_GE_U32 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_t_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_T_U32 |
| |
| Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32() |
| { |
| } // ~Inst_VOP3__V_CMP_T_U32 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_f_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_F_I32 |
| |
| Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32() |
| { |
| } // ~Inst_VOP3__V_CMPX_F_I32 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lt_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LT_I32 |
| |
| Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32() |
| { |
| } // ~Inst_VOP3__V_CMPX_LT_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_eq_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_EQ_I32 |
| |
| Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32() |
| { |
| } // ~Inst_VOP3__V_CMPX_EQ_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_le_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LE_I32 |
| |
| Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32() |
| { |
| } // ~Inst_VOP3__V_CMPX_LE_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_gt_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GT_I32 |
| |
| Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32() |
| { |
| } // ~Inst_VOP3__V_CMPX_GT_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ne_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_NE_I32 |
| |
| Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32() |
| { |
| } // ~Inst_VOP3__V_CMPX_NE_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ge_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GE_I32 |
| |
| Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32() |
| { |
| } // ~Inst_VOP3__V_CMPX_GE_I32 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_t_i32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_T_I32 |
| |
| Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32() |
| { |
| } // ~Inst_VOP3__V_CMPX_T_I32 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_f_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_F_U32 |
| |
| Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32() |
| { |
| } // ~Inst_VOP3__V_CMPX_F_U32 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lt_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LT_U32 |
| |
| Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32() |
| { |
| } // ~Inst_VOP3__V_CMPX_LT_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_eq_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_EQ_U32 |
| |
| Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32() |
| { |
| } // ~Inst_VOP3__V_CMPX_EQ_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_le_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LE_U32 |
| |
| Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32() |
| { |
| } // ~Inst_VOP3__V_CMPX_LE_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_gt_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GT_U32 |
| |
| Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32() |
| { |
| } // ~Inst_VOP3__V_CMPX_GT_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ne_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_NE_U32 |
| |
| Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32() |
| { |
| } // ~Inst_VOP3__V_CMPX_NE_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ge_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GE_U32 |
| |
| Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32() |
| { |
| } // ~Inst_VOP3__V_CMPX_GE_U32 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_t_u32", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_T_U32 |
| |
| Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32() |
| { |
| } // ~Inst_VOP3__V_CMPX_T_U32 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_f_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_F_I64 |
| |
| Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64() |
| { |
| } // ~Inst_VOP3__V_CMP_F_I64 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lt_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LT_I64 |
| |
| Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64() |
| { |
| } // ~Inst_VOP3__V_CMP_LT_I64 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_eq_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_EQ_I64 |
| |
| Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64() |
| { |
| } // ~Inst_VOP3__V_CMP_EQ_I64 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_le_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LE_I64 |
| |
| Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64() |
| { |
| } // ~Inst_VOP3__V_CMP_LE_I64 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_gt_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GT_I64 |
| |
| Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64() |
| { |
| } // ~Inst_VOP3__V_CMP_GT_I64 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ne_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_NE_I64 |
| |
| Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64() |
| { |
| } // ~Inst_VOP3__V_CMP_NE_I64 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ge_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GE_I64 |
| |
| Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64() |
| { |
| } // ~Inst_VOP3__V_CMP_GE_I64 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_t_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_T_I64 |
| |
| Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64() |
| { |
| } // ~Inst_VOP3__V_CMP_T_I64 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_f_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_F_U64 |
| |
| Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64() |
| { |
| } // ~Inst_VOP3__V_CMP_F_U64 |
| |
| // D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_lt_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LT_U64 |
| |
| Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64() |
| { |
| } // ~Inst_VOP3__V_CMP_LT_U64 |
| |
| // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_eq_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_EQ_U64 |
| |
| Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64() |
| { |
| } // ~Inst_VOP3__V_CMP_EQ_U64 |
| |
| // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_le_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_LE_U64 |
| |
| Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64() |
| { |
| } // ~Inst_VOP3__V_CMP_LE_U64 |
| |
| // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_gt_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GT_U64 |
| |
| Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64() |
| { |
| } // ~Inst_VOP3__V_CMP_GT_U64 |
| |
| // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ne_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_NE_U64 |
| |
| Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64() |
| { |
| } // ~Inst_VOP3__V_CMP_NE_U64 |
| |
| // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_ge_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_GE_U64 |
| |
| Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64() |
| { |
| } // ~Inst_VOP3__V_CMP_GE_U64 |
| |
| // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmp_t_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMP_T_U64 |
| |
| Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64() |
| { |
| } // ~Inst_VOP3__V_CMP_T_U64 |
| |
| // D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_f_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_F_I64 |
| |
| Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64() |
| { |
| } // ~Inst_VOP3__V_CMPX_F_I64 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lt_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LT_I64 |
| |
| Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64() |
| { |
| } // ~Inst_VOP3__V_CMPX_LT_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_eq_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_EQ_I64 |
| |
| Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64() |
| { |
| } // ~Inst_VOP3__V_CMPX_EQ_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_le_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LE_I64 |
| |
| Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64() |
| { |
| } // ~Inst_VOP3__V_CMPX_LE_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_gt_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GT_I64 |
| |
| Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64() |
| { |
| } // ~Inst_VOP3__V_CMPX_GT_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ne_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_NE_I64 |
| |
| Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64() |
| { |
| } // ~Inst_VOP3__V_CMPX_NE_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ge_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GE_I64 |
| |
| Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64() |
| { |
| } // ~Inst_VOP3__V_CMPX_GE_I64 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_t_i64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_T_I64 |
| |
| Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64() |
| { |
| } // ~Inst_VOP3__V_CMPX_T_I64 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_f_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_F_U64 |
| |
| Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64() |
| { |
| } // ~Inst_VOP3__V_CMPX_F_U64 |
| |
| // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_lt_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LT_U64 |
| |
| Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64() |
| { |
| } // ~Inst_VOP3__V_CMPX_LT_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_eq_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_EQ_U64 |
| |
| Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64() |
| { |
| } // ~Inst_VOP3__V_CMPX_EQ_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_le_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_LE_U64 |
| |
| Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64() |
| { |
| } // ~Inst_VOP3__V_CMPX_LE_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_gt_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GT_U64 |
| |
| Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64() |
| { |
| } // ~Inst_VOP3__V_CMPX_GT_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ne_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_NE_U64 |
| |
| Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64() |
| { |
| } // ~Inst_VOP3__V_CMPX_NE_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_ge_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_GE_U64 |
| |
| Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64() |
| { |
| } // ~Inst_VOP3__V_CMPX_GE_U64 |
| |
| // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cmpx_t_u64", true) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CMPX_T_U64 |
| |
| Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64() |
| { |
| } // ~Inst_VOP3__V_CMPX_T_U64 |
| |
| // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. |
| void |
| Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ScalarOperandU64 sdst(gpuDynInst, instData.VDST); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| sdst.setBit(lane, 1); |
| } |
| } |
| |
| wf->execMask() = sdst.rawData(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cndmask_b32", false) |
| { |
| setFlag(ALU); |
| setFlag(ReadsVCC); |
| } // Inst_VOP3__V_CNDMASK_B32 |
| |
| Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32() |
| { |
| } // ~Inst_VOP3__V_CNDMASK_B32 |
| |
| // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC |
| // as a scalar GPR in S2. |
| void |
| Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| vcc.read(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = bits(vcc.rawData(), lane) |
| ? src1[lane] : src0[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_add_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_ADD_F32 |
| |
| Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32() |
| { |
| } // ~Inst_VOP3__V_ADD_F32 |
| |
| // D.f = S0.f + S1.f. |
| void |
| Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] + src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sub_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_SUB_F32 |
| |
| Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32() |
| { |
| } // ~Inst_VOP3__V_SUB_F32 |
| |
| // D.f = S0.f - S1.f. |
| void |
| Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] - src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_subrev_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_SUBREV_F32 |
| |
| Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32() |
| { |
| } // ~Inst_VOP3__V_SUBREV_F32 |
| |
| // D.f = S1.f - S0.f. |
| void |
| Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] - src0[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_legacy_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_MUL_LEGACY_F32 |
| |
| Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32() |
| { |
| } // ~Inst_VOP3__V_MUL_LEGACY_F32 |
| |
| // D.f = S0.f * S1.f |
| void |
| Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isnan(src0[lane]) || |
| std::isnan(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) && |
| !std::signbit(src0[lane])) { |
| if (std::isinf(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if (!std::signbit(src1[lane])) { |
| vdst[lane] = +0.0; |
| } else { |
| vdst[lane] = -0.0; |
| } |
| } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) && |
| std::signbit(src0[lane])) { |
| if (std::isinf(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if (std::signbit(src1[lane])) { |
| vdst[lane] = +0.0; |
| } else { |
| vdst[lane] = -0.0; |
| } |
| } else if (std::isinf(src0[lane]) && |
| !std::signbit(src0[lane])) { |
| if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| vdst[lane] = NAN; |
| } else if (!std::signbit(src1[lane])) { |
| vdst[lane] = +INFINITY; |
| } else { |
| vdst[lane] = -INFINITY; |
| } |
| } else if (std::isinf(src0[lane]) && |
| std::signbit(src0[lane])) { |
| if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| vdst[lane] = NAN; |
| } else if (std::signbit(src1[lane])) { |
| vdst[lane] = +INFINITY; |
| } else { |
| vdst[lane] = -INFINITY; |
| } |
| } else { |
| vdst[lane] = src0[lane] * src1[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_MUL_F32 |
| |
| Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32() |
| { |
| } // ~Inst_VOP3__V_MUL_F32 |
| |
| // D.f = S0.f * S1.f. |
| void |
| Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isnan(src0[lane]) || |
| std::isnan(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) && |
| !std::signbit(src0[lane])) { |
| if (std::isinf(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if (!std::signbit(src1[lane])) { |
| vdst[lane] = +0.0; |
| } else { |
| vdst[lane] = -0.0; |
| } |
| } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) && |
| std::signbit(src0[lane])) { |
| if (std::isinf(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if (std::signbit(src1[lane])) { |
| vdst[lane] = +0.0; |
| } else { |
| vdst[lane] = -0.0; |
| } |
| } else if (std::isinf(src0[lane]) && |
| !std::signbit(src0[lane])) { |
| if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| vdst[lane] = NAN; |
| } else if (!std::signbit(src1[lane])) { |
| vdst[lane] = +INFINITY; |
| } else { |
| vdst[lane] = -INFINITY; |
| } |
| } else if (std::isinf(src0[lane]) && |
| std::signbit(src0[lane])) { |
| if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| vdst[lane] = NAN; |
| } else if (std::signbit(src1[lane])) { |
| vdst[lane] = +INFINITY; |
| } else { |
| vdst[lane] = -INFINITY; |
| } |
| } else { |
| vdst[lane] = src0[lane] * src1[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_i32_i24", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MUL_I32_I24 |
| |
| Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24() |
| { |
| } // ~Inst_VOP3__V_MUL_I32_I24 |
| |
| // D.i = S0.i[23:0] * S1.i[23:0]. |
| void |
| Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = szext<24>(src0[lane]) * szext<24>(src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_hi_i32_i24", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MUL_HI_I32_I24 |
| |
| Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24() |
| { |
| } // ~Inst_VOP3__V_MUL_HI_I32_I24 |
| |
| // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32. |
| void |
| Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemI64 tmp_src0 = (VecElemI64)szext<24>(src0[lane]); |
| VecElemI64 tmp_src1 = (VecElemI64)szext<24>(src1[lane]); |
| |
| vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_u32_u24", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MUL_U32_U24 |
| |
| Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24() |
| { |
| } // ~Inst_VOP3__V_MUL_U32_U24 |
| |
| // D.u = S0.u[23:0] * S1.u[23:0]. |
| void |
| Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_hi_u32_u24", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MUL_HI_U32_U24 |
| |
| Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24() |
| { |
| } // ~Inst_VOP3__V_MUL_HI_U32_U24 |
| |
| // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32. |
| void |
| Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); |
| VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); |
| vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_MIN_F32 |
| |
| Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32() |
| { |
| } // ~Inst_VOP3__V_MIN_F32 |
| |
| // D.f = (S0.f < S1.f ? S0.f : S1.f). |
| void |
| Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fmin(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_MAX_F32 |
| |
| Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32() |
| { |
| } // ~Inst_VOP3__V_MAX_F32 |
| |
| // D.f = (S0.f >= S1.f ? S0.f : S1.f). |
| void |
| Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fmax(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MIN_I32 |
| |
| Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32() |
| { |
| } // ~Inst_VOP3__V_MIN_I32 |
| |
| // D.i = min(S0.i, S1.i). |
| void |
| Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::min(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MAX_I32 |
| |
| Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32() |
| { |
| } // ~Inst_VOP3__V_MAX_I32 |
| |
| // D.i = max(S0.i, S1.i). |
| void |
| Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::max(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MIN_U32 |
| |
| Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32() |
| { |
| } // ~Inst_VOP3__V_MIN_U32 |
| |
| // D.u = min(S0.u, S1.u). |
| void |
| Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::min(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MAX_U32 |
| |
| Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32() |
| { |
| } // ~Inst_VOP3__V_MAX_U32 |
| |
| // D.u = max(S0.u, S1.u). |
| void |
| Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::max(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_lshrrev_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_LSHRREV_B32 |
| |
| Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32() |
| { |
| } // ~Inst_VOP3__V_LSHRREV_B32 |
| |
| // D.u = S1.u >> S0.u[4:0]. |
| // The vacated bits are set to zero. |
| void |
| Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ashrrev_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_ASHRREV_I32 |
| |
| Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32() |
| { |
| } // ~Inst_VOP3__V_ASHRREV_I32 |
| |
| // D.i = signext(S1.i) >> S0.i[4:0]. |
| // The vacated bits are set to the sign bit of the input value. |
| void |
| Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_lshlrev_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_LSHLREV_B32 |
| |
| Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32() |
| { |
| } // ~Inst_VOP3__V_LSHLREV_B32 |
| |
| // D.u = S1.u << S0.u[4:0]. |
| void |
| Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_and_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_AND_B32 |
| |
| Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32() |
| { |
| } // ~Inst_VOP3__V_AND_B32 |
| |
| // D.u = S0.u & S1.u. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] & src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_or_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_OR_B32 |
| |
| Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32() |
| { |
| } // ~Inst_VOP3__V_OR_B32 |
| |
| // D.u = S0.u | S1.u. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] | src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_xor_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_XOR_B32 |
| |
| Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32() |
| { |
| } // ~Inst_VOP3__V_XOR_B32 |
| |
| // D.u = S0.u ^ S1.u. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] ^ src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mac_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| setFlag(MAC); |
| } // Inst_VOP3__V_MAC_F32 |
| |
| Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32() |
| { |
| } // ~Inst_VOP3__V_MAC_F32 |
| |
| // D.f = S0.f * S1.f + D.f. |
| void |
| Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| vdst.read(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_add_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| } // Inst_VOP3__V_ADD_U32 |
| |
| Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32() |
| { |
| } // ~Inst_VOP3__V_ADD_U32 |
| |
| // D.u = S0.u + S1.u; |
| // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED |
| // overflow or carry-out. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair. |
| void |
| Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 vcc(gpuDynInst, instData.SDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] + src1[lane]; |
| vcc.setBit(lane, ((VecElemU64)src0[lane] |
| + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| vcc.write(); |
| } |
| |
| Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_sub_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| } // Inst_VOP3__V_SUB_U32 |
| |
| Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32() |
| { |
| } // ~Inst_VOP3__V_SUB_U32 |
| |
| // D.u = S0.u - S1.u; |
| // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or |
| // carry-out. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair. |
| void |
| Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 vcc(gpuDynInst, instData.SDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] - src1[lane]; |
| vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| vcc.write(); |
| } |
| |
| Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32( |
| InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_subrev_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| } // Inst_VOP3__V_SUBREV_U32 |
| |
| Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32() |
| { |
| } // ~Inst_VOP3__V_SUBREV_U32 |
| |
| // D.u = S1.u - S0.u; |
| // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or |
| // carry-out. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair. |
| void |
| Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 vcc(gpuDynInst, instData.SDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] - src0[lane]; |
| vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| vcc.write(); |
| } |
| |
| Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_addc_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(ReadsVCC); |
| } // Inst_VOP3__V_ADDC_U32 |
| |
| Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32() |
| { |
| } // ~Inst_VOP3__V_ADDC_U32 |
| |
| // D.u = S0.u + S1.u + VCC[threadId]; |
| // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0) |
| // is an UNSIGNED overflow. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC |
| // source comes from the SGPR-pair at S2.u. |
| void |
| Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| vcc.read(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] + src1[lane] |
| + bits(vcc.rawData(), lane); |
| sdst.setBit(lane, ((VecElemU64)src0[lane] |
| + (VecElemU64)src1[lane] |
| + (VecElemU64)bits(vcc.rawData(), lane)) |
| >= 0x100000000 ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_subb_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(ReadsVCC); |
| } // Inst_VOP3__V_SUBB_U32 |
| |
| Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32() |
| { |
| } // ~Inst_VOP3__V_SUBB_U32 |
| |
| // D.u = S0.u - S1.u - VCC[threadId]; |
| // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED |
| // overflow. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC |
| // source comes from the SGPR-pair at S2.u. |
| void |
| Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); |
| ScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| vcc.read(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] - src1[lane] |
| - bits(vcc.rawData(), lane); |
| sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) |
| > src0[lane] ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32( |
| InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_subbrev_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(ReadsVCC); |
| } // Inst_VOP3__V_SUBBREV_U32 |
| |
| Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32() |
| { |
| } // ~Inst_VOP3__V_SUBBREV_U32 |
| |
| // D.u = S1.u - S0.u - VCC[threadId]; |
| // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED |
| // overflow. |
| // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC |
| // source comes from the SGPR-pair at S2.u. |
| void |
| Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST); |
| ScalarOperandU64 vcc(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| vcc.read(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] - src0[lane] |
| - bits(vcc.rawData(), lane); |
| sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) |
| > src0[lane] ? 1 : 0); |
| } |
| } |
| |
| vdst.write(); |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_add_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_ADD_F16 |
| |
| Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16() |
| { |
| } // ~Inst_VOP3__V_ADD_F16 |
| |
| // D.f16 = S0.f16 + S1.f16. |
| void |
| Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sub_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_SUB_F16 |
| |
| Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16() |
| { |
| } // ~Inst_VOP3__V_SUB_F16 |
| |
| // D.f16 = S0.f16 - S1.f16. |
| void |
| Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_subrev_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_SUBREV_F16 |
| |
| Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16() |
| { |
| } // ~Inst_VOP3__V_SUBREV_F16 |
| |
| // D.f16 = S1.f16 - S0.f16. |
| void |
| Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_MUL_F16 |
| |
| Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16() |
| { |
| } // ~Inst_VOP3__V_MUL_F16 |
| |
| // D.f16 = S0.f16 * S1.f16. |
| void |
| Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mac_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| setFlag(MAC); |
| } // Inst_VOP3__V_MAC_F16 |
| |
| Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16() |
| { |
| } // ~Inst_VOP3__V_MAC_F16 |
| |
| // D.f16 = S0.f16 * S1.f16 + D.f16. |
| void |
| Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_add_u16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_ADD_U16 |
| |
| Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16() |
| { |
| } // ~Inst_VOP3__V_ADD_U16 |
| |
| // D.u16 = S0.u16 + S1.u16. |
| void |
| Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] + src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sub_u16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_SUB_U16 |
| |
| Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16() |
| { |
| } // ~Inst_VOP3__V_SUB_U16 |
| |
| // D.u16 = S0.u16 - S1.u16. |
| void |
| Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] - src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_subrev_u16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_SUBREV_U16 |
| |
| Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16() |
| { |
| } // ~Inst_VOP3__V_SUBREV_U16 |
| |
| // D.u16 = S1.u16 - S0.u16. |
| void |
| Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] - src0[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_lo_u16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MUL_LO_U16 |
| |
| Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16() |
| { |
| } // ~Inst_VOP3__V_MUL_LO_U16 |
| |
| // D.u16 = S0.u16 * S1.u16. |
| void |
| Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] * src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_lshlrev_b16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_LSHLREV_B16 |
| |
| Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16() |
| { |
| } // ~Inst_VOP3__V_LSHLREV_B16 |
| |
| // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. |
| void |
| Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_lshrrev_b16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_LSHRREV_B16 |
| |
| Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16() |
| { |
| } // ~Inst_VOP3__V_LSHRREV_B16 |
| |
| // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. |
| // The vacated bits are set to zero. |
| void |
| Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ashrrev_i16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_ASHRREV_I16 |
| |
| Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16() |
| { |
| } // ~Inst_VOP3__V_ASHRREV_I16 |
| |
| // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. |
| // The vacated bits are set to the sign bit of the input value. |
| void |
| Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| VecOperandI16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_MAX_F16 |
| |
| Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16() |
| { |
| } // ~Inst_VOP3__V_MAX_F16 |
| |
| // D.f16 = max(S0.f16, S1.f16). |
| void |
| Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_MIN_F16 |
| |
| Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16() |
| { |
| } // ~Inst_VOP3__V_MIN_F16 |
| |
| // D.f16 = min(S0.f16, S1.f16). |
| void |
| Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max_u16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MAX_U16 |
| |
| Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16() |
| { |
| } // ~Inst_VOP3__V_MAX_U16 |
| |
| // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). |
| void |
| Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::max(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max_i16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MAX_I16 |
| |
| Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16() |
| { |
| } // ~Inst_VOP3__V_MAX_I16 |
| |
| // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). |
| void |
| Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| VecOperandI16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::max(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min_u16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MIN_U16 |
| |
| Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16() |
| { |
| } // ~Inst_VOP3__V_MIN_U16 |
| |
| // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). |
| void |
| Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::min(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min_i16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MIN_I16 |
| |
| Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16() |
| { |
| } // ~Inst_VOP3__V_MIN_I16 |
| |
| // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). |
| void |
| Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| VecOperandI16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::min(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ldexp_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_LDEXP_F16 |
| |
| Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16() |
| { |
| } // ~Inst_VOP3__V_LDEXP_F16 |
| |
| // D.f16 = S0.f16 * (2 ** S1.i16). |
| void |
| Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_nop", false) |
| { |
| setFlag(Nop); |
| setFlag(ALU); |
| } // Inst_VOP3__V_NOP |
| |
| Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP() |
| { |
| } // ~Inst_VOP3__V_NOP |
| |
| // Do nothing. |
| void |
| Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mov_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MOV_B32 |
| |
| Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32() |
| { |
| } // ~Inst_VOP3__V_MOV_B32 |
| |
| // D.u = S0.u. |
| // Input and output modifiers not supported; this is an untyped operation. |
| void |
| Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_i32_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CVT_I32_F64 |
| |
| Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64() |
| { |
| } // ~Inst_VOP3__V_CVT_I32_F64 |
| |
| // D.i = (int)S0.d. |
| // Out-of-range floating point values (including infinity) saturate. NaN |
| // is converted to 0. |
| void |
| Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int exp; |
| std::frexp(src[lane],&exp); |
| if (std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else if (std::isinf(src[lane]) || exp > 30) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = INT_MIN; |
| } else { |
| vdst[lane] = INT_MAX; |
| } |
| } else { |
| vdst[lane] = (VecElemI32)src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f64_i32", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CVT_F64_I32 |
| |
| Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32() |
| { |
| } // ~Inst_VOP3__V_CVT_F64_I32 |
| |
| // D.d = (double)S0.i. |
| void |
| Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF64)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f32_i32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_F32_I32 |
| |
| Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32() |
| { |
| } // ~Inst_VOP3__V_CVT_F32_I32 |
| |
| // D.f = (float)S0.i. |
| void |
| Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| VecOperandI32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f32_u32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_F32_U32 |
| |
| Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32() |
| { |
| } // ~Inst_VOP3__V_CVT_F32_U32 |
| |
| // D.f = (float)S0.u. |
| void |
| Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_u32_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_U32_F32 |
| |
| Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_U32_F32 |
| |
| // D.u = (unsigned)S0.f. |
| // Out-of-range floating point values (including infinity) saturate. NaN |
| // is converted to 0. |
| void |
| Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int exp; |
| std::frexp(src[lane],&exp); |
| if (std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else if (std::isinf(src[lane])) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = 0; |
| } else { |
| vdst[lane] = UINT_MAX; |
| } |
| } else if (exp > 31) { |
| vdst[lane] = UINT_MAX; |
| } else { |
| vdst[lane] = (VecElemU32)src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_i32_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_I32_F32 |
| |
| Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_I32_F32 |
| |
| // D.i = (int)S0.f. |
| // Out-of-range floating point values (including infinity) saturate. NaN |
| // is converted to 0. |
| void |
| Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int exp; |
| std::frexp(src[lane],&exp); |
| if (std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else if (std::isinf(src[lane]) || exp > 30) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = INT_MIN; |
| } else { |
| vdst[lane] = INT_MAX; |
| } |
| } else { |
| vdst[lane] = (VecElemI32)src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mov_fed_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MOV_FED_B32 |
| |
| Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32() |
| { |
| } // ~Inst_VOP3__V_MOV_FED_B32 |
| |
| // D.u = S0.u; |
| // Input and output modifiers not supported; this is an untyped operation. |
| void |
| Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f16_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_F16_F32 |
| |
| Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_F16_F32 |
| |
| // D.f16 = flt32_to_flt16(S0.f). |
| void |
| Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f32_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_F32_F16 |
| |
| Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16() |
| { |
| } // ~Inst_VOP3__V_CVT_F32_F16 |
| |
| // D.f = flt16_to_flt32(S0.f16). |
| void |
| Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_rpi_i32_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_RPI_I32_F32 |
| |
| Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_RPI_I32_F32 |
| |
| // D.i = (int)floor(S0.f + 0.5). |
| void |
| Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_flr_i32_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_FLR_I32_F32 |
| |
| Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_FLR_I32_F32 |
| |
| // D.i = (int)floor(S0.f). |
| void |
| Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemI32)std::floor(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_off_f32_i4", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_OFF_F32_I4 |
| |
| Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4() |
| { |
| } // ~Inst_VOP3__V_CVT_OFF_F32_I4 |
| |
| // 4-bit signed int to 32-bit float. |
| void |
| Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f32_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CVT_F32_F64 |
| |
| Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64() |
| { |
| } // ~Inst_VOP3__V_CVT_F32_F64 |
| |
| // D.f = (float)S0.d. |
| void |
| Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f64_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CVT_F64_F32 |
| |
| Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_F64_F32 |
| |
| // D.d = (double)S0.f. |
| void |
| Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF64)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f32_ubyte0", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_F32_UBYTE0 |
| |
| Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0() |
| { |
| } // ~Inst_VOP3__V_CVT_F32_UBYTE0 |
| |
| // D.f = (float)(S0.u[7:0]). |
| void |
| Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)bits(src[lane], 7, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f32_ubyte1", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_F32_UBYTE1 |
| |
| Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1() |
| { |
| } // ~Inst_VOP3__V_CVT_F32_UBYTE1 |
| |
| // D.f = (float)(S0.u[15:8]). |
| void |
| Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)bits(src[lane], 15, 8); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f32_ubyte2", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_F32_UBYTE2 |
| |
| Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2() |
| { |
| } // ~Inst_VOP3__V_CVT_F32_UBYTE2 |
| |
| // D.f = (float)(S0.u[23:16]). |
| void |
| Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)bits(src[lane], 23, 16); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f32_ubyte3", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_F32_UBYTE3 |
| |
| Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3() |
| { |
| } // ~Inst_VOP3__V_CVT_F32_UBYTE3 |
| |
| // D.f = (float)(S0.u[31:24]). |
| void |
| Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF32)bits(src[lane], 31, 24); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_u32_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CVT_U32_F64 |
| |
| Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64() |
| { |
| } // ~Inst_VOP3__V_CVT_U32_F64 |
| |
| // D.u = (unsigned)S0.d. |
| // Out-of-range floating point values (including infinity) saturate. NaN |
| // is converted to 0. |
| void |
| Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int exp; |
| std::frexp(src[lane],&exp); |
| if (std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else if (std::isinf(src[lane])) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = 0; |
| } else { |
| vdst[lane] = UINT_MAX; |
| } |
| } else if (exp > 31) { |
| vdst[lane] = UINT_MAX; |
| } else { |
| vdst[lane] = (VecElemU32)src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f64_u32", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CVT_F64_U32 |
| |
| Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32() |
| { |
| } // ~Inst_VOP3__V_CVT_F64_U32 |
| |
| // D.d = (double)S0.u. |
| void |
| Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (VecElemF64)src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_trunc_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_TRUNC_F64 |
| |
| Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64() |
| { |
| } // ~Inst_VOP3__V_TRUNC_F64 |
| |
| // D.d = trunc(S0.d), return integer part of S0.d. |
| void |
| Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::trunc(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ceil_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_CEIL_F64 |
| |
| Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64() |
| { |
| } // ~Inst_VOP3__V_CEIL_F64 |
| |
| // D.d = ceil(S0.d); |
| void |
| Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::ceil(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rndne_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_RNDNE_F64 |
| |
| Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64() |
| { |
| } // ~Inst_VOP3__V_RNDNE_F64 |
| |
| // D.d = round_nearest_even(S0.d). |
| void |
| Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = roundNearestEven(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_floor_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_FLOOR_F64 |
| |
| Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64() |
| { |
| } // ~Inst_VOP3__V_FLOOR_F64 |
| |
| // D.d = floor(S0.d); |
| void |
| Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::floor(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_fract_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_FRACT_F32 |
| |
| Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32() |
| { |
| } // ~Inst_VOP3__V_FRACT_F32 |
| |
| // D.f = modf(S0.f). |
| void |
| Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemF32 int_part(0.0); |
| vdst[lane] = std::modf(src[lane], &int_part); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_trunc_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_TRUNC_F32 |
| |
| Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32() |
| { |
| } // ~Inst_VOP3__V_TRUNC_F32 |
| |
| // D.f = trunc(S0.f), return integer part of S0.f. |
| void |
| Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::trunc(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ceil_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CEIL_F32 |
| |
| Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32() |
| { |
| } // ~Inst_VOP3__V_CEIL_F32 |
| |
| // D.f = ceil(S0.f); |
| void |
| Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::ceil(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rndne_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_RNDNE_F32 |
| |
| Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32() |
| { |
| } // ~Inst_VOP3__V_RNDNE_F32 |
| |
| // D.f = round_nearest_even(S0.f). |
| void |
| Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = roundNearestEven(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_floor_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_FLOOR_F32 |
| |
| Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32() |
| { |
| } // ~Inst_VOP3__V_FLOOR_F32 |
| |
| // D.f = floor(S0.f); |
| void |
| Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::floor(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_exp_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_EXP_F32 |
| |
| Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32() |
| { |
| } // ~Inst_VOP3__V_EXP_F32 |
| |
| // D.f = pow(2.0, S0.f). |
| void |
| Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::pow(2.0, src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_log_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_LOG_F32 |
| |
| Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32() |
| { |
| } // ~Inst_VOP3__V_LOG_F32 |
| |
| // D.f = log2(S0.f). |
| void |
| Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::log2(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rcp_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_RCP_F32 |
| |
| Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32() |
| { |
| } // ~Inst_VOP3__V_RCP_F32 |
| |
| // D.f = 1.0 / S0.f. |
| void |
| Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = 1.0 / src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rcp_iflag_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_RCP_IFLAG_F32 |
| |
| Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32() |
| { |
| } // ~Inst_VOP3__V_RCP_IFLAG_F32 |
| |
| // D.f = 1.0 / S0.f. |
| void |
| Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = 1.0 / src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rsq_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_RSQ_F32 |
| |
| Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32() |
| { |
| } // ~Inst_VOP3__V_RSQ_F32 |
| |
| // D.f = 1.0 / sqrt(S0.f). |
| void |
| Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = 1.0 / std::sqrt(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rcp_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_RCP_F64 |
| |
| Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64() |
| { |
| } // ~Inst_VOP3__V_RCP_F64 |
| |
| // D.d = 1.0 / S0.d. |
| void |
| Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::fpclassify(src[lane]) == FP_ZERO) { |
| vdst[lane] = +INFINITY; |
| } else if (std::isnan(src[lane])) { |
| vdst[lane] = NAN; |
| } else if (std::isinf(src[lane])) { |
| if (std::signbit(src[lane])) { |
| vdst[lane] = -0.0; |
| } else { |
| vdst[lane] = 0.0; |
| } |
| } else { |
| vdst[lane] = 1.0 / src[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rsq_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_RSQ_F64 |
| |
| Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64() |
| { |
| } // ~Inst_VOP3__V_RSQ_F64 |
| |
| // D.d = 1.0 / sqrt(S0.d). |
| void |
| Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::fpclassify(src[lane]) == FP_ZERO) { |
| vdst[lane] = +INFINITY; |
| } else if (std::isnan(src[lane])) { |
| vdst[lane] = NAN; |
| } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) { |
| vdst[lane] = 0.0; |
| } else if (std::signbit(src[lane])) { |
| vdst[lane] = NAN; |
| } else { |
| vdst[lane] = 1.0 / std::sqrt(src[lane]); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sqrt_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_SQRT_F32 |
| |
| Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32() |
| { |
| } // ~Inst_VOP3__V_SQRT_F32 |
| |
| // D.f = sqrt(S0.f). |
| void |
| Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::sqrt(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sqrt_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_SQRT_F64 |
| |
| Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64() |
| { |
| } // ~Inst_VOP3__V_SQRT_F64 |
| |
| // D.d = sqrt(S0.d). |
| void |
| Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::sqrt(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sin_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_SIN_F32 |
| |
| Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32() |
| { |
| } // ~Inst_VOP3__V_SIN_F32 |
| |
| // D.f = sin(S0.f * 2 * PI). |
| void |
| Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| ConstScalarOperandF32 pi(gpuDynInst, REG_PI); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| pi.read(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::sin(src[lane] * 2 * pi.rawData()); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cos_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_COS_F32 |
| |
| Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32() |
| { |
| } // ~Inst_VOP3__V_COS_F32 |
| |
| // D.f = cos(S0.f * 2 * PI). |
| void |
| Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| ConstScalarOperandF32 pi(gpuDynInst, REG_PI); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| pi.read(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::cos(src[lane] * 2 * pi.rawData()); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_not_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_NOT_B32 |
| |
| Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32() |
| { |
| } // ~Inst_VOP3__V_NOT_B32 |
| |
| // D.u = ~S0.u. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = ~src[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_bfrev_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_BFREV_B32 |
| |
| Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32() |
| { |
| } // ~Inst_VOP3__V_BFREV_B32 |
| |
| // D.u[31:0] = S0.u[0:31], bitfield reverse. |
| // Input and output modifiers not supported. |
| void |
| Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = reverseBits(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ffbh_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_FFBH_U32 |
| |
| Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32() |
| { |
| } // ~Inst_VOP3__V_FFBH_U32 |
| |
| // D.u = position of first 1 in S0.u from MSB; |
| // D.u = 0xffffffff if S0.u == 0. |
| void |
| Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = findFirstOneMsb(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ffbl_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_FFBL_B32 |
| |
| Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32() |
| { |
| } // ~Inst_VOP3__V_FFBL_B32 |
| |
| // D.u = position of first 1 in S0.u from LSB; |
| // D.u = 0xffffffff if S0.u == 0. |
| void |
| Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = findFirstOne(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ffbh_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_FFBH_I32 |
| |
| Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32() |
| { |
| } // ~Inst_VOP3__V_FFBH_I32 |
| |
| // D.u = position of first bit different from sign bit in S0.i from MSB; |
| // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. |
| void |
| Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src(gpuDynInst, extData.SRC0); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = firstOppositeSignBit(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_frexp_exp_i32_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_FREXP_EXP_I32_F64 |
| |
| Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64() |
| { |
| } // ~Inst_VOP3__V_FREXP_EXP_I32_F64 |
| |
| // See V_FREXP_EXP_I32_F32. |
| void |
| Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isinf(src[lane]) || std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else { |
| VecElemI32 exp(0); |
| std::frexp(src[lane], &exp); |
| vdst[lane] = exp; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_frexp_mant_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_FREXP_MANT_F64 |
| |
| Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64() |
| { |
| } // ~Inst_VOP3__V_FREXP_MANT_F64 |
| |
| void |
| Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemI32 exp(0); |
| vdst[lane] = std::frexp(src[lane], &exp); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_fract_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_FRACT_F64 |
| |
| Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64() |
| { |
| } // ~Inst_VOP3__V_FRACT_F64 |
| |
| void |
| Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src(gpuDynInst, extData.SRC0); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemF32 int_part(0.0); |
| vdst[lane] = std::modf(src[lane], &int_part); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_frexp_exp_i32_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_FREXP_EXP_I32_F32 |
| |
| Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32() |
| { |
| } // ~Inst_VOP3__V_FREXP_EXP_I32_F32 |
| |
| // frexp(S0.f, Exponenti(S0.f)) |
| // if (S0.f == INF || S0.f == NAN) then D.i = 0; |
| // else D.i = Exponent(S0.f) |
| void |
| Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isinf(src[lane])|| std::isnan(src[lane])) { |
| vdst[lane] = 0; |
| } else { |
| VecElemI32 exp(0); |
| std::frexp(src[lane], &exp); |
| vdst[lane] = exp; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_frexp_mant_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_FREXP_MANT_F32 |
| |
| Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32() |
| { |
| } // ~Inst_VOP3__V_FREXP_MANT_F32 |
| |
| // if (S0.f == INF || S0.f == NAN) then D.f = S0.f; |
| // else D.f = Mantissa(S0.f). |
| void |
| Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isinf(src[lane]) || std::isnan(src[lane])) { |
| vdst[lane] = src[lane]; |
| } else { |
| VecElemI32 exp(0); |
| vdst[lane] = std::frexp(src[lane], &exp); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_clrexcp", false) |
| { |
| } // Inst_VOP3__V_CLREXCP |
| |
| Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP() |
| { |
| } // ~Inst_VOP3__V_CLREXCP |
| |
| void |
| Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f16_u16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CVT_F16_U16 |
| |
| Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16() |
| { |
| } // ~Inst_VOP3__V_CVT_F16_U16 |
| |
| // D.f16 = uint16_to_flt16(S.u16). |
| void |
| Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_f16_i16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CVT_F16_I16 |
| |
| Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16() |
| { |
| } // ~Inst_VOP3__V_CVT_F16_I16 |
| |
| // D.f16 = int16_to_flt16(S.i16). |
| void |
| Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_u16_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CVT_U16_F16 |
| |
| Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16() |
| { |
| } // ~Inst_VOP3__V_CVT_U16_F16 |
| |
| // D.u16 = flt16_to_uint16(S.f16). |
| void |
| Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_i16_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CVT_I16_F16 |
| |
| Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16() |
| { |
| } // ~Inst_VOP3__V_CVT_I16_F16 |
| |
| // D.i16 = flt16_to_int16(S.f16). |
| void |
| Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rcp_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_RCP_F16 |
| |
| Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16() |
| { |
| } // ~Inst_VOP3__V_RCP_F16 |
| |
| // if (S0.f16 == 1.0f) |
| // D.f16 = 1.0f; |
| // else |
| // D.f16 = 1 / S0.f16. |
| void |
| Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sqrt_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_SQRT_F16 |
| |
| Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16() |
| { |
| } // ~Inst_VOP3__V_SQRT_F16 |
| |
| // if (S0.f16 == 1.0f) |
| // D.f16 = 1.0f; |
| // else |
| // D.f16 = sqrt(S0.f16). |
| void |
| Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rsq_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_RSQ_F16 |
| |
| Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16() |
| { |
| } // ~Inst_VOP3__V_RSQ_F16 |
| |
| // if (S0.f16 == 1.0f) |
| // D.f16 = 1.0f; |
| // else |
| // D.f16 = 1 / sqrt(S0.f16). |
| void |
| Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_log_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_LOG_F16 |
| |
| Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16() |
| { |
| } // ~Inst_VOP3__V_LOG_F16 |
| |
| // if (S0.f16 == 1.0f) |
| // D.f16 = 0.0f; |
| // else |
| // D.f16 = log2(S0.f16). |
| void |
| Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_exp_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_EXP_F16 |
| |
| Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16() |
| { |
| } // ~Inst_VOP3__V_EXP_F16 |
| |
| // if (S0.f16 == 0.0f) |
| // D.f16 = 1.0f; |
| // else |
| // D.f16 = pow(2.0, S0.f16). |
| void |
| Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_frexp_mant_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_FREXP_MANT_F16 |
| |
| Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16() |
| { |
| } // ~Inst_VOP3__V_FREXP_MANT_F16 |
| |
| // if (S0.f16 == +-INF || S0.f16 == NAN) |
| // D.f16 = S0.f16; |
| // else |
| // D.f16 = mantissa(S0.f16). |
| void |
| Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_frexp_exp_i16_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_FREXP_EXP_I16_F16 |
| |
| Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16() |
| { |
| } // ~Inst_VOP3__V_FREXP_EXP_I16_F16 |
| |
| void |
| Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_floor_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_FLOOR_F16 |
| |
| Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16() |
| { |
| } // ~Inst_VOP3__V_FLOOR_F16 |
| |
| // D.f16 = floor(S0.f16); |
| void |
| Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ceil_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_CEIL_F16 |
| |
| Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16() |
| { |
| } // ~Inst_VOP3__V_CEIL_F16 |
| |
| // D.f16 = ceil(S0.f16); |
| void |
| Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_trunc_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_TRUNC_F16 |
| |
| Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16() |
| { |
| } // ~Inst_VOP3__V_TRUNC_F16 |
| |
| // D.f16 = trunc(S0.f16). |
| void |
| Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_rndne_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_RNDNE_F16 |
| |
| Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16() |
| { |
| } // ~Inst_VOP3__V_RNDNE_F16 |
| |
| // D.f16 = roundNearestEven(S0.f16); |
| void |
| Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_fract_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_FRACT_F16 |
| |
| Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16() |
| { |
| } // ~Inst_VOP3__V_FRACT_F16 |
| |
| // D.f16 = S0.f16 + -floor(S0.f16). |
| void |
| Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sin_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_SIN_F16 |
| |
| Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16() |
| { |
| } // ~Inst_VOP3__V_SIN_F16 |
| |
| // D.f16 = sin(S0.f16 * 2 * PI). |
| void |
| Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cos_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_COS_F16 |
| |
| Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16() |
| { |
| } // ~Inst_VOP3__V_COS_F16 |
| |
| // D.f16 = cos(S0.f16 * 2 * PI). |
| void |
| Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_exp_legacy_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_EXP_LEGACY_F32 |
| |
| Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32() |
| { |
| } // ~Inst_VOP3__V_EXP_LEGACY_F32 |
| |
| // D.f = pow(2.0, S0.f) |
| void |
| Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::pow(2.0, src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_log_legacy_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_LOG_LEGACY_F32 |
| |
| Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32() |
| { |
| } // ~Inst_VOP3__V_LOG_LEGACY_F32 |
| |
| // D.f = log2(S0.f). |
| void |
| Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src(gpuDynInst, extData.SRC0); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::log2(src[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mad_legacy_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| setFlag(MAD); |
| } // Inst_VOP3__V_MAD_LEGACY_F32 |
| |
| Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32() |
| { |
| } // ~Inst_VOP3__V_MAD_LEGACY_F32 |
| |
| // D.f = S0.f * S1.f + S2.f |
| void |
| Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mad_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| setFlag(MAD); |
| } // Inst_VOP3__V_MAD_F32 |
| |
| Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32() |
| { |
| } // ~Inst_VOP3__V_MAD_F32 |
| |
| // D.f = S0.f * S1.f + S2.f. |
| void |
| Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mad_i32_i24", false) |
| { |
| setFlag(ALU); |
| setFlag(MAD); |
| } // Inst_VOP3__V_MAD_I32_I24 |
| |
| Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24() |
| { |
| } // ~Inst_VOP3__V_MAD_I32_I24 |
| |
| // D.i = S0.i[23:0] * S1.i[23:0] + S2.i. |
| void |
| Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = szext<24>(src0[lane]) |
| * szext<24>(src1[lane]) + src2[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mad_u32_u24", false) |
| { |
| setFlag(ALU); |
| setFlag(MAD); |
| } // Inst_VOP3__V_MAD_U32_U24 |
| |
| Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24() |
| { |
| } // ~Inst_VOP3__V_MAD_U32_U24 |
| |
| // D.u = S0.u[23:0] * S1.u[23:0] + S2.u. |
| void |
| Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0) |
| + src2[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cubeid_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CUBEID_F32 |
| |
| Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32() |
| { |
| } // ~Inst_VOP3__V_CUBEID_F32 |
| |
| void |
| Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cubesc_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CUBESC_F32 |
| |
| Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32() |
| { |
| } // ~Inst_VOP3__V_CUBESC_F32 |
| |
| void |
| Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cubetc_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CUBETC_F32 |
| |
| Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32() |
| { |
| } // ~Inst_VOP3__V_CUBETC_F32 |
| |
| void |
| Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cubema_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CUBEMA_F32 |
| |
| Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32() |
| { |
| } // ~Inst_VOP3__V_CUBEMA_F32 |
| |
| void |
| Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_bfe_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_BFE_U32 |
| |
| Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32() |
| { |
| } // ~Inst_VOP3__V_BFE_U32 |
| |
| // D.u = (S0.u >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1). |
| // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width. |
| void |
| Inst_VOP3__V_BFE_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) |
| & ((1 << bits(src2[lane], 4, 0)) - 1); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_bfe_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_BFE_I32 |
| |
| Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32() |
| { |
| } // ~Inst_VOP3__V_BFE_I32 |
| |
| // D.i = (S0.i >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1). |
| // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width. |
| void |
| Inst_VOP3__V_BFE_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) |
| & ((1 << bits(src2[lane], 4, 0)) - 1); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_bfi_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_BFI_B32 |
| |
| Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32() |
| { |
| } // ~Inst_VOP3__V_BFI_B32 |
| |
| // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert. |
| void |
| Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane] |
| & src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_fma_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| setFlag(FMA); |
| } // Inst_VOP3__V_FMA_F32 |
| |
| Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32() |
| { |
| } // ~Inst_VOP3__V_FMA_F32 |
| |
| // D.f = S0.f * S1.f + S2.f. |
| void |
| Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_fma_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| setFlag(FMA); |
| } // Inst_VOP3__V_FMA_F64 |
| |
| Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64() |
| { |
| } // ~Inst_VOP3__V_FMA_F64 |
| |
| // D.d = S0.d * S1.d + S2.d. |
| void |
| Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_lerp_u8", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_LERP_U8 |
| |
| Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8() |
| { |
| } // ~Inst_VOP3__V_LERP_U8 |
| |
| // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24 |
| // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16; |
| // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8; |
| // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1). |
| void |
| Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = ((bits(src0[lane], 31, 24) |
| + bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1) |
| << 24; |
| vdst[lane] += ((bits(src0[lane], 23, 16) |
| + bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1) |
| << 16; |
| vdst[lane] += ((bits(src0[lane], 15, 8) |
| + bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1) |
| << 8; |
| vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0) |
| + bits(src2[lane], 0)) >> 1); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_alignbit_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_ALIGNBIT_B32 |
| |
| Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32() |
| { |
| } // ~Inst_VOP3__V_ALIGNBIT_B32 |
| |
| // D.u = ({S0, S1} >> S2.u[4:0]) & 0xffffffff. |
| void |
| Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) |
| | (VecElemU64)src1[lane]); |
| vdst[lane] = (VecElemU32)((src_0_1 |
| >> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_alignbyte_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_ALIGNBYTE_B32 |
| |
| Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32() |
| { |
| } // ~Inst_VOP3__V_ALIGNBYTE_B32 |
| |
| // D.u = ({S0, S1} >> (8 * S2.u[4:0])) & 0xffffffff. |
| void |
| Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) |
| | (VecElemU64)src1[lane]); |
| vdst[lane] = (VecElemU32)((src_0_1 |
| >> (8ULL * (VecElemU64)bits(src2[lane], 4, 0))) |
| & 0xffffffff); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min3_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_MIN3_F32 |
| |
| Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32() |
| { |
| } // ~Inst_VOP3__V_MIN3_F32 |
| |
| // D.f = min(S0.f, S1.f, S2.f). |
| void |
| Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]); |
| vdst[lane] = std::fmin(min_0_1, src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min3_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MIN3_I32 |
| |
| Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32() |
| { |
| } // ~Inst_VOP3__V_MIN3_I32 |
| |
| // D.i = min(S0.i, S1.i, S2.i). |
| void |
| Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]); |
| vdst[lane] = std::min(min_0_1, src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min3_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MIN3_U32 |
| |
| Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32() |
| { |
| } // ~Inst_VOP3__V_MIN3_U32 |
| |
| // D.u = min(S0.u, S1.u, S2.u). |
| void |
| Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]); |
| vdst[lane] = std::min(min_0_1, src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max3_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_MAX3_F32 |
| |
| Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32() |
| { |
| } // ~Inst_VOP3__V_MAX3_F32 |
| |
| // D.f = max(S0.f, S1.f, S2.f). |
| void |
| Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]); |
| vdst[lane] = std::fmax(max_0_1, src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max3_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MAX3_I32 |
| |
| Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32() |
| { |
| } // ~Inst_VOP3__V_MAX3_I32 |
| |
| // D.i = max(S0.i, S1.i, S2.i). |
| void |
| Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]); |
| vdst[lane] = std::max(max_0_1, src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max3_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MAX3_U32 |
| |
| Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32() |
| { |
| } // ~Inst_VOP3__V_MAX3_U32 |
| |
| // D.u = max(S0.u, S1.u, S2.u). |
| void |
| Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]); |
| vdst[lane] = std::max(max_0_1, src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_med3_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_MED3_F32 |
| |
| Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32() |
| { |
| } // ~Inst_VOP3__V_MED3_F32 |
| |
| // D.f = median(S0.f, S1.f, S2.f). |
| void |
| Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = median(src0[lane], src1[lane], src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_med3_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MED3_I32 |
| |
| Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32() |
| { |
| } // ~Inst_VOP3__V_MED3_I32 |
| |
| // D.i = median(S0.i, S1.i, S2.i). |
| void |
| Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = median(src0[lane], src1[lane], src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_med3_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MED3_U32 |
| |
| Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32() |
| { |
| } // ~Inst_VOP3__V_MED3_U32 |
| |
| // D.u = median(S0.u, S1.u, S2.u). |
| void |
| Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = median(src0[lane], src1[lane], src2[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sad_u8", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_SAD_U8 |
| |
| Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8() |
| { |
| } // ~Inst_VOP3__V_SAD_U8 |
| |
| // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) + |
| // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u. |
| // Sum of absolute differences with accumulation, overflow into upper bits |
| // is allowed. |
| void |
| Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::abs(bits(src0[lane], 31, 24) |
| - bits(src1[lane], 31, 24)) |
| + std::abs(bits(src0[lane], 23, 16) |
| - bits(src1[lane], 23, 16)) |
| + std::abs(bits(src0[lane], 15, 8) |
| - bits(src1[lane], 15, 8)) |
| + std::abs(bits(src0[lane], 7, 0) |
| - bits(src1[lane], 7, 0)) + src2[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sad_hi_u8", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_SAD_HI_U8 |
| |
| Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8() |
| { |
| } // ~Inst_VOP3__V_SAD_HI_U8 |
| |
| // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u. |
| // Sum of absolute differences with accumulation, overflow is lost. |
| void |
| Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (((bits(src0[lane], 31, 24) |
| - bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16) |
| - bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8) |
| - bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0) |
| - bits(src1[lane], 7, 0))) << 16) + src2[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sad_u16", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_SAD_U16 |
| |
| Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16() |
| { |
| } // ~Inst_VOP3__V_SAD_U16 |
| |
| // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0]) |
| // + S2.u. |
| // Word SAD with accumulation. |
| void |
| Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::abs(bits(src0[lane], 31, 16) |
| - bits(src1[lane], 31, 16)) |
| + std::abs(bits(src0[lane], 15, 0) |
| - bits(src1[lane], 15, 0)) + src2[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_sad_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_SAD_U32 |
| |
| Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32() |
| { |
| } // ~Inst_VOP3__V_SAD_U32 |
| |
| // D.u = abs(S0.i - S1.i) + S2.u. |
| // Dword SAD with accumulation. |
| void |
| Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_pk_u8_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_PK_U8_F32 |
| |
| Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_PK_U8_F32 |
| |
| // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0])) |
| // | (S2.u & ~(0xff << (8 * S1.u[1:0]))). |
| // Convert floating point value S0 to 8-bit unsigned integer and pack the |
| // result into byte S1 of dword S2. |
| void |
| Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = (((VecElemU8)src0[lane] & 0xff) |
| << (8 * bits(src1[lane], 1, 0))) |
| | (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0)))); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_div_fixup_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_DIV_FIXUP_F32 |
| |
| Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32() |
| { |
| } // ~Inst_VOP3__V_DIV_FIXUP_F32 |
| |
| // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator, |
| // s2.f = Numerator. |
| void |
| Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::fpclassify(src1[lane]) == FP_ZERO) { |
| if (std::signbit(src1[lane])) { |
| vdst[lane] = -INFINITY; |
| } else { |
| vdst[lane] = +INFINITY; |
| } |
| } else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if (std::isinf(src1[lane])) { |
| if (std::signbit(src1[lane])) { |
| vdst[lane] = -INFINITY; |
| } else { |
| vdst[lane] = +INFINITY; |
| } |
| } else { |
| vdst[lane] = src2[lane] / src1[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } // execute |
| // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods --- |
| |
| Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_div_fixup_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_DIV_FIXUP_F64 |
| |
| Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64() |
| { |
| } // ~Inst_VOP3__V_DIV_FIXUP_F64 |
| |
| // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator, |
| // s2.d = Numerator. |
| void |
| Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int sign_out = std::signbit(src1[lane]) |
| ^ std::signbit(src2[lane]); |
| int exp1(0); |
| int exp2(0); |
| std::frexp(src1[lane], &exp1); |
| std::frexp(src2[lane], &exp2); |
| |
| if (std::isnan(src1[lane]) || std::isnan(src2[lane])) { |
| vdst[lane] = std::numeric_limits<VecElemF64>::quiet_NaN(); |
| } else if (std::fpclassify(src1[lane]) == FP_ZERO |
| && std::fpclassify(src2[lane]) == FP_ZERO) { |
| vdst[lane] |
| = std::numeric_limits<VecElemF64>::signaling_NaN(); |
| } else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) { |
| vdst[lane] |
| = std::numeric_limits<VecElemF64>::signaling_NaN(); |
| } else if (std::fpclassify(src1[lane]) == FP_ZERO |
| || std::isinf(src2[lane])) { |
| vdst[lane] = sign_out ? -INFINITY : +INFINITY; |
| } else if (std::isinf(src1[lane]) |
| || std::fpclassify(src2[lane]) == FP_ZERO) { |
| vdst[lane] = sign_out ? -0.0 : +0.0; |
| } else if (exp2 - exp1 < -1075) { |
| vdst[lane] = src0[lane]; |
| } else if (exp1 == 2047) { |
| vdst[lane] = src0[lane]; |
| } else { |
| vdst[lane] = sign_out ? -std::fabs(src0[lane]) |
| : std::fabs(src0[lane]); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32( |
| InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(F32); |
| } // Inst_VOP3__V_DIV_SCALE_F32 |
| |
| Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32() |
| { |
| } // ~Inst_VOP3__V_DIV_SCALE_F32 |
| |
| // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f = |
| // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a |
| // numerator and denominator, this opcode will appropriately scale inputs |
| // for division to avoid subnormal terms during Newton-Raphson correction |
| // algorithm. This opcode producses a VCC flag for post-scale of quotient. |
| void |
| Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); |
| ScalarOperandU64 vcc(gpuDynInst, instData.SDST); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane]; |
| vcc.setBit(lane, 0); |
| } |
| } |
| |
| vcc.write(); |
| vdst.write(); |
| } // execute |
| // --- Inst_VOP3__V_DIV_SCALE_F64 class methods --- |
| |
| Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64( |
| InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f64") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(F64); |
| } // Inst_VOP3__V_DIV_SCALE_F64 |
| |
| Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64() |
| { |
| } // ~Inst_VOP3__V_DIV_SCALE_F64 |
| |
| // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d = |
| // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a |
| // numerator and denominator, this opcode will appropriately scale inputs |
| // for division to avoid subnormal terms during Newton-Raphson correction |
| // algorithm. This opcode producses a VCC flag for post-scale of quotient. |
| void |
| Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); |
| ScalarOperandU64 vcc(gpuDynInst, instData.SDST); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| int exp1(0); |
| int exp2(0); |
| std::frexp(src1[lane], &exp1); |
| std::frexp(src2[lane], &exp2); |
| vcc.setBit(lane, 0); |
| |
| if (std::fpclassify(src1[lane]) == FP_ZERO |
| || std::fpclassify(src2[lane]) == FP_ZERO) { |
| vdst[lane] = NAN; |
| } else if (exp2 - exp1 >= 768) { |
| vcc.setBit(lane, 1); |
| if (src0[lane] == src1[lane]) { |
| vdst[lane] = std::ldexp(src0[lane], 128); |
| } |
| } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) { |
| vdst[lane] = std::ldexp(src0[lane], 128); |
| } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL |
| && std::fpclassify(src2[lane] / src1[lane]) |
| == FP_SUBNORMAL) { |
| vcc.setBit(lane, 1); |
| if (src0[lane] == src1[lane]) { |
| vdst[lane] = std::ldexp(src0[lane], 128); |
| } |
| } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) { |
| vdst[lane] = std::ldexp(src0[lane], -128); |
| } else if (std::fpclassify(src2[lane] / src1[lane]) |
| == FP_SUBNORMAL) { |
| vcc.setBit(lane, 1); |
| if (src0[lane] == src2[lane]) { |
| vdst[lane] = std::ldexp(src0[lane], 128); |
| } |
| } else if (exp2 <= 53) { |
| vdst[lane] = std::ldexp(src0[lane], 128); |
| } |
| } |
| } |
| |
| vcc.write(); |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_div_fmas_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(ReadsVCC); |
| setFlag(F32); |
| setFlag(FMA); |
| } // Inst_VOP3__V_DIV_FMAS_F32 |
| |
| Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32() |
| { |
| } // ~Inst_VOP3__V_DIV_FMAS_F32 |
| |
| // D.f = Special case divide FMA with scale and flags(s0.f = Quotient, |
| // s1.f = Denominator, s2.f = Numerator) |
| void |
| Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); |
| } |
| } |
| |
| //vdst.write(); |
| } // execute |
| // --- Inst_VOP3__V_DIV_FMAS_F64 class methods --- |
| |
| Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_div_fmas_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(ReadsVCC); |
| setFlag(F64); |
| setFlag(FMA); |
| } // Inst_VOP3__V_DIV_FMAS_F64 |
| |
| Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64() |
| { |
| } // ~Inst_VOP3__V_DIV_FMAS_F64 |
| |
| // D.d = Special case divide FMA with scale and flags(s0.d = Quotient, |
| // s1.d = Denominator, s2.d = Numerator) |
| void |
| Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| vcc.read(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (instData.ABS & 0x4) { |
| src2.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| if (extData.NEG & 0x4) { |
| src2.negModifier(); |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (bits(vcc.rawData(), lane)) { |
| vdst[lane] = std::pow(2, 64) |
| * std::fma(src0[lane], src1[lane], src2[lane]); |
| } else { |
| vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_msad_u8", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MSAD_U8 |
| |
| Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8() |
| { |
| } // ~Inst_VOP3__V_MSAD_U8 |
| |
| // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u). |
| void |
| Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_qsad_pk_u16_u8", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_QSAD_PK_U16_U8 |
| |
| Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8() |
| { |
| } // ~Inst_VOP3__V_QSAD_PK_U16_U8 |
| |
| // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], |
| // S1.u[31:0], S2.u[63:0]) |
| void |
| Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mqsad_pk_u16_u8", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MQSAD_PK_U16_U8 |
| |
| Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8() |
| { |
| } // ~Inst_VOP3__V_MQSAD_PK_U16_U8 |
| |
| // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], |
| // S1.u[31:0], S2.u[63:0]) |
| void |
| Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mqsad_u32_u8", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MQSAD_U32_U8 |
| |
| Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8() |
| { |
| } // ~Inst_VOP3__V_MQSAD_U32_U8 |
| |
| // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0], |
| // S1.u[31:0], S2.u[127:0]) |
| void |
| Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32( |
| InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_mad_u64_u32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(MAD); |
| } // Inst_VOP3__V_MAD_U64_U32 |
| |
| Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32() |
| { |
| } // ~Inst_VOP3__V_MAD_U64_U32 |
| |
| // {vcc_out, D.u64} = S0.u32 * S1.u32 + S2.u64. |
| void |
| Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); |
| ScalarOperandU64 vcc(gpuDynInst, instData.SDST); |
| VecOperandU64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| vdst.read(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], |
| src2[lane])); |
| } |
| } |
| |
| vcc.write(); |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32( |
| InFmt_VOP3_SDST_ENC *iFmt) |
| : Inst_VOP3_SDST_ENC(iFmt, "v_mad_i64_i32") |
| { |
| setFlag(ALU); |
| setFlag(WritesVCC); |
| setFlag(MAD); |
| } // Inst_VOP3__V_MAD_I64_I32 |
| |
| Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32() |
| { |
| } // ~Inst_VOP3__V_MAD_I64_I32 |
| |
| // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64. |
| void |
| Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandI64 src2(gpuDynInst, extData.SRC2); |
| ScalarOperandU64 vcc(gpuDynInst, instData.SDST); |
| VecOperandI64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], |
| src2[lane])); |
| } |
| } |
| |
| vcc.write(); |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mad_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| setFlag(MAD); |
| } // Inst_VOP3__V_MAD_F16 |
| |
| Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16() |
| { |
| } // ~Inst_VOP3__V_MAD_F16 |
| |
| // D.f16 = S0.f16 * S1.f16 + S2.f16. |
| // Supports round mode, exception flags, saturation. |
| void |
| Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mad_u16", false) |
| { |
| setFlag(ALU); |
| setFlag(MAD); |
| } // Inst_VOP3__V_MAD_U16 |
| |
| Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16() |
| { |
| } // ~Inst_VOP3__V_MAD_U16 |
| |
| // D.u16 = S0.u16 * S1.u16 + S2.u16. |
| // Supports saturation (unsigned 16-bit integer domain). |
| void |
| Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU16 src2(gpuDynInst, extData.SRC2); |
| VecOperandU16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] * src1[lane] + src2[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mad_i16", false) |
| { |
| setFlag(ALU); |
| setFlag(MAD); |
| } // Inst_VOP3__V_MAD_I16 |
| |
| Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16() |
| { |
| } // ~Inst_VOP3__V_MAD_I16 |
| |
| // D.i16 = S0.i16 * S1.i16 + S2.i16. |
| // Supports saturation (signed 16-bit integer domain). |
| void |
| Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandI16 src2(gpuDynInst, extData.SRC2); |
| VecOperandI16 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src0[lane] * src1[lane] + src2[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_perm_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_PERM_B32 |
| |
| Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32() |
| { |
| } // ~Inst_VOP3__V_PERM_B32 |
| |
| // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]); |
| // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]); |
| // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]); |
| // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]); |
| // byte permute(byte in[8], byte sel) { |
| // if(sel>=13) then return 0xff; |
| // elsif(sel==12) then return 0x00; |
| // elsif(sel==11) then return in[7][7] * 0xff; |
| // elsif(sel==10) then return in[5][7] * 0xff; |
| // elsif(sel==9) then return in[3][7] * 0xff; |
| // elsif(sel==8) then return in[1][7] * 0xff; |
| // else return in[sel]; |
| // } |
| void |
| Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| src2.readSrc(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemU64 selector = (VecElemU64)src0[lane]; |
| selector = (selector << 32) | (VecElemU64)src1[lane]; |
| vdst[lane] = 0; |
| |
| DPRINTF(GCN3, "Executing v_perm_b32 src_0 0x%08x, src_1 " |
| "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane], |
| src1[lane], src2[lane], vdst[lane]); |
| DPRINTF(GCN3, "Selector: 0x%08x \n", selector); |
| |
| for (int i = 0; i < 4 ; ++i) { |
| VecElemU32 permuted_val = permute(selector, 0xFF |
| & ((VecElemU32)src2[lane] >> (8 * i))); |
| vdst[lane] |= (permuted_val << i); |
| } |
| |
| DPRINTF(GCN3, "v_perm result: 0x%08x\n", vdst[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_fma_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| setFlag(FMA); |
| } // Inst_VOP3__V_FMA_F16 |
| |
| Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16() |
| { |
| } // ~Inst_VOP3__V_FMA_F16 |
| |
| // D.f16 = S0.f16 * S1.f16 + S2.f16. |
| // Fused half precision multiply add. |
| void |
| Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_div_fixup_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_DIV_FIXUP_F16 |
| |
| Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16() |
| { |
| } // ~Inst_VOP3__V_DIV_FIXUP_F16 |
| |
| // sign_out = sign(S1.f16)^sign(S2.f16); |
| // if (S2.f16 == NAN) |
| // D.f16 = Quiet(S2.f16); |
| // else if (S1.f16 == NAN) |
| // D.f16 = Quiet(S1.f16); |
| // else if (S1.f16 == S2.f16 == 0) |
| // # 0/0 |
| // D.f16 = pele_nan(0xfe00); |
| // else if (abs(S1.f16) == abs(S2.f16) == +-INF) |
| // # inf/inf |
| // D.f16 = pele_nan(0xfe00); |
| // else if (S1.f16 ==0 || abs(S2.f16) == +-INF) |
| // # x/0, or inf/y |
| // D.f16 = sign_out ? -INF : INF; |
| // else if (abs(S1.f16) == +-INF || S2.f16 == 0) |
| // # x/inf, 0/y |
| // D.f16 = sign_out ? -0 : 0; |
| // else if ((exp(S2.f16) - exp(S1.f16)) < -150) |
| // D.f16 = sign_out ? -underflow : underflow; |
| // else if (exp(S1.f16) == 255) |
| // D.f16 = sign_out ? -overflow : overflow; |
| // else |
| // D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16). |
| // Half precision division fixup. |
| // S0 = Quotient, S1 = Denominator, S3 = Numerator. |
| // Given a numerator, denominator, and quotient from a divide, this opcode |
| // will detect and apply special case numerics, touching up the quotient if |
| // necessary. This opcode also generates invalid, denorm and divide by |
| // zero exceptions caused by the division. |
| void |
| Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_pkaccum_u8_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_PKACCUM_U8_F32 |
| |
| Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32 |
| |
| // byte = S1.u[1:0]; bit = byte * 8; |
| // D.u[bit + 7:bit] = flt32_to_uint8(S0.f); |
| // Pack converted value of S0.f into byte S1 of the destination. |
| // SQ translates to V_CVT_PK_U8_F32. |
| // Note: this opcode uses src_c to pass destination in as a source. |
| void |
| Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_interp_p1_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_INTERP_P1_F32 |
| |
| Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32() |
| { |
| } // ~Inst_VOP3__V_INTERP_P1_F32 |
| |
| // D.f = P10 * S.f + P0; |
| void |
| Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_interp_p2_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_INTERP_P2_F32 |
| |
| Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32() |
| { |
| } // ~Inst_VOP3__V_INTERP_P2_F32 |
| |
| // D.f = P20 * S.f + D.f; |
| void |
| Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_interp_mov_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_INTERP_MOV_F32 |
| |
| Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32() |
| { |
| } // ~Inst_VOP3__V_INTERP_MOV_F32 |
| |
| // D.f = {P10,P20,P0}[S.u]; parameter load. |
| void |
| Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_interp_p1ll_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_INTERP_P1LL_F16 |
| |
| Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16() |
| { |
| } // ~Inst_VOP3__V_INTERP_P1LL_F16 |
| |
| // D.f32 = P10.f16 * S0.f32 + P0.f16. |
| void |
| Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_interp_p1lv_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_INTERP_P1LV_F16 |
| |
| Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16() |
| { |
| } // ~Inst_VOP3__V_INTERP_P1LV_F16 |
| |
| void |
| Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_interp_p2_f16", false) |
| { |
| setFlag(ALU); |
| setFlag(F16); |
| } // Inst_VOP3__V_INTERP_P2_F16 |
| |
| Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16() |
| { |
| } // ~Inst_VOP3__V_INTERP_P2_F16 |
| |
| // D.f16 = P20.f16 * S0.f32 + S2.f32. |
| void |
| Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_add_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_ADD_F64 |
| |
| Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64() |
| { |
| } // ~Inst_VOP3__V_ADD_F64 |
| |
| // D.d = S0.d + S1.d. |
| void |
| Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isnan(src0[lane]) || |
| std::isnan(src1[lane]) ) { |
| vdst[lane] = NAN; |
| } else if (std::isinf(src0[lane]) && |
| std::isinf(src1[lane])) { |
| if (std::signbit(src0[lane]) != |
| std::signbit(src1[lane])) { |
| vdst[lane] = NAN; |
| } else { |
| vdst[lane] = src0[lane]; |
| } |
| } else if (std::isinf(src0[lane])) { |
| vdst[lane] = src0[lane]; |
| } else if (std::isinf(src1[lane])) { |
| vdst[lane] = src1[lane]; |
| } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) { |
| if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| if (std::signbit(src0[lane]) && |
| std::signbit(src1[lane])) { |
| vdst[lane] = -0.0; |
| } else { |
| vdst[lane] = 0.0; |
| } |
| } else { |
| vdst[lane] = src1[lane]; |
| } |
| } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) { |
| if (std::signbit(src0[lane]) && |
| std::signbit(src1[lane])) { |
| vdst[lane] = -0.0; |
| } else { |
| vdst[lane] = 0.0; |
| } |
| } else { |
| vdst[lane] = src0[lane]; |
| } |
| } else { |
| vdst[lane] = src0[lane] + src1[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_MUL_F64 |
| |
| Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64() |
| { |
| } // ~Inst_VOP3__V_MUL_F64 |
| |
| // D.d = S0.d * S1.d. |
| void |
| Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isnan(src0[lane]) || |
| std::isnan(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) && |
| !std::signbit(src0[lane])) { |
| if (std::isinf(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if (!std::signbit(src1[lane])) { |
| vdst[lane] = +0.0; |
| } else { |
| vdst[lane] = -0.0; |
| } |
| } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src0[lane]) == FP_ZERO) && |
| std::signbit(src0[lane])) { |
| if (std::isinf(src1[lane])) { |
| vdst[lane] = NAN; |
| } else if (std::signbit(src1[lane])) { |
| vdst[lane] = +0.0; |
| } else { |
| vdst[lane] = -0.0; |
| } |
| } else if (std::isinf(src0[lane]) && |
| !std::signbit(src0[lane])) { |
| if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| vdst[lane] = NAN; |
| } else if (!std::signbit(src1[lane])) { |
| vdst[lane] = +INFINITY; |
| } else { |
| vdst[lane] = -INFINITY; |
| } |
| } else if (std::isinf(src0[lane]) && |
| std::signbit(src0[lane])) { |
| if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || |
| std::fpclassify(src1[lane]) == FP_ZERO) { |
| vdst[lane] = NAN; |
| } else if (std::signbit(src1[lane])) { |
| vdst[lane] = +INFINITY; |
| } else { |
| vdst[lane] = -INFINITY; |
| } |
| } else { |
| vdst[lane] = src0[lane] * src1[lane]; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_min_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_MIN_F64 |
| |
| Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64() |
| { |
| } // ~Inst_VOP3__V_MIN_F64 |
| |
| // D.d = min(S0.d, S1.d). |
| void |
| Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fmin(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_max_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_MAX_F64 |
| |
| Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64() |
| { |
| } // ~Inst_VOP3__V_MAX_F64 |
| |
| // D.d = max(S0.d, S1.d). |
| void |
| Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (instData.ABS & 0x2) { |
| src1.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| if (extData.NEG & 0x2) { |
| src1.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::fmax(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ldexp_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_LDEXP_F64 |
| |
| Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64() |
| { |
| } // ~Inst_VOP3__V_LDEXP_F64 |
| |
| // D.d = pow(S0.d, S1.i[31:0]). |
| void |
| Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| if (instData.ABS & 0x1) { |
| src0.absModifier(); |
| } |
| |
| if (extData.NEG & 0x1) { |
| src0.negModifier(); |
| } |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| if (std::isnan(src0[lane]) || std::isinf(src0[lane])) { |
| vdst[lane] = src0[lane]; |
| } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL |
| || std::fpclassify(src0[lane]) == FP_ZERO) { |
| if (std::signbit(src0[lane])) { |
| vdst[lane] = -0.0; |
| } else { |
| vdst[lane] = +0.0; |
| } |
| } else { |
| vdst[lane] = std::ldexp(src0[lane], src1[lane]); |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_lo_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MUL_LO_U32 |
| |
| Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32() |
| { |
| } // ~Inst_VOP3__V_MUL_LO_U32 |
| |
| // D.u = S0.u * S1.u. |
| void |
| Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemI64 s0 = (VecElemI64)src0[lane]; |
| VecElemI64 s1 = (VecElemI64)src1[lane]; |
| vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_hi_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MUL_HI_U32 |
| |
| Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32() |
| { |
| } // ~Inst_VOP3__V_MUL_HI_U32 |
| |
| // D.u = (S0.u * S1.u) >> 32. |
| void |
| Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemI64 s0 = (VecElemI64)src0[lane]; |
| VecElemI64 s1 = (VecElemI64)src1[lane]; |
| vdst[lane] |
| = (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mul_hi_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MUL_HI_I32 |
| |
| Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32() |
| { |
| } // ~Inst_VOP3__V_MUL_HI_I32 |
| |
| // D.i = (S0.i * S1.i) >> 32. |
| void |
| Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| VecOperandI32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| VecElemI64 s0 = (VecElemI64)src0[lane]; |
| VecElemI64 s1 = (VecElemI64)src1[lane]; |
| vdst[lane] |
| = (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ldexp_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_LDEXP_F32 |
| |
| Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32() |
| { |
| } // ~Inst_VOP3__V_LDEXP_F32 |
| |
| // D.f = pow(S0.f, S1.i) |
| void |
| Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); |
| VecOperandF32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = std::ldexp(src0[lane], src1[lane]); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_readlane_b32", true) |
| { |
| setFlag(ALU); |
| setFlag(IgnoreExec); |
| } // Inst_VOP3__V_READLANE_B32 |
| |
| Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32() |
| { |
| } // ~Inst_VOP3__V_READLANE_B32 |
| |
| // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR# |
| // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask. |
| // Input and output modifiers not supported; this is an untyped operation. |
| void |
| Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); |
| ScalarOperandU32 sdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.read(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| sdst = src0[src1.rawData() & 0x3f]; |
| |
| sdst.write(); |
| } |
| |
| Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_writelane_b32", false) |
| { |
| setFlag(ALU); |
| setFlag(IgnoreExec); |
| } // Inst_VOP3__V_WRITELANE_B32 |
| |
| Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32() |
| { |
| } // ~Inst_VOP3__V_WRITELANE_B32 |
| |
| // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data |
| // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores |
| // exec mask. Input and output modifiers not supported; this is an untyped |
| // operation. |
| void |
| Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.read(); |
| src1.read(); |
| vdst.read(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| vdst[src1.rawData() & 0x3f] = src0.rawData(); |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_bcnt_u32_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_BCNT_U32_B32 |
| |
| Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32() |
| { |
| } // ~Inst_VOP3__V_BCNT_U32_B32 |
| |
| // D.u = CountOneBits(S0.u) + S1.u. Bit count. |
| void |
| Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = popCount(src0[lane]) + src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mbcnt_lo_u32_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MBCNT_LO_U32_B32 |
| |
| Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32() |
| { |
| } // ~Inst_VOP3__V_MBCNT_LO_U32_B32 |
| |
| // Masked bit count, ThreadPosition is the position of this thread in the |
| // wavefront (in 0..63). |
| void |
| Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| uint64_t threadMask = 0; |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| threadMask = ((1LL << lane) - 1LL); |
| vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) + |
| src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } // execute |
| // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods --- |
| |
| Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_mbcnt_hi_u32_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_MBCNT_HI_U32_B32 |
| |
| Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32() |
| { |
| } // ~Inst_VOP3__V_MBCNT_HI_U32_B32 |
| |
| // ThreadMask = (1 << ThreadPosition) - 1; |
| // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u. |
| // Masked bit count, ThreadPosition is the position of this thread in the |
| // wavefront (in 0..63). |
| void |
| Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| uint64_t threadMask = 0; |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| threadMask = ((1LL << lane) - 1LL); |
| vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) + |
| src1[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } // execute |
| // --- Inst_VOP3__V_LSHLREV_B64 class methods --- |
| |
| Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_lshlrev_b64", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_LSHLREV_B64 |
| |
| Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64() |
| { |
| } // ~Inst_VOP3__V_LSHLREV_B64 |
| |
| // D.u64 = S1.u64 << S0.u[5:0]. |
| void |
| Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| VecOperandU64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] << bits(src0[lane], 5, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_lshrrev_b64", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_LSHRREV_B64 |
| |
| Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64() |
| { |
| } // ~Inst_VOP3__V_LSHRREV_B64 |
| |
| // D.u64 = S1.u64 >> S0.u[5:0]. |
| // The vacated bits are set to zero. |
| void |
| Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); |
| VecOperandU64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_ashrrev_i64", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_ASHRREV_I64 |
| |
| Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64() |
| { |
| } // ~Inst_VOP3__V_ASHRREV_I64 |
| |
| // D.u64 = signext(S1.u64) >> S0.u[5:0]. |
| // The vacated bits are set to the sign bit of the input value. |
| void |
| Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); |
| VecOperandU64 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] |
| = src1[lane] >> bits(src0[lane], 5, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_trig_preop_f64", false) |
| { |
| setFlag(ALU); |
| setFlag(F64); |
| } // Inst_VOP3__V_TRIG_PREOP_F64 |
| |
| Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64() |
| { |
| } // ~Inst_VOP3__V_TRIG_PREOP_F64 |
| |
| void |
| Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_bfm_b32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_BFM_B32 |
| |
| Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32() |
| { |
| } // ~Inst_VOP3__V_BFM_B32 |
| |
| // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0]; |
| void |
| Inst_VOP3__V_BFM_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); |
| ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); |
| VecOperandU32 vdst(gpuDynInst, instData.VDST); |
| |
| src0.readSrc(); |
| src1.readSrc(); |
| |
| /** |
| * input modifiers are supported by FP operations only |
| */ |
| assert(!(instData.ABS & 0x1)); |
| assert(!(instData.ABS & 0x2)); |
| assert(!(instData.ABS & 0x4)); |
| assert(!(extData.NEG & 0x1)); |
| assert(!(extData.NEG & 0x2)); |
| assert(!(extData.NEG & 0x4)); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1) |
| << bits(src1[lane], 4, 0); |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_pknorm_i16_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_PKNORM_I16_F32 |
| |
| Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32 |
| |
| // D = {(snorm)S1.f, (snorm)S0.f}. |
| void |
| Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_pknorm_u16_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_PKNORM_U16_F32 |
| |
| Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32 |
| |
| // D = {(unorm)S1.f, (unorm)S0.f}. |
| void |
| Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32( |
| InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_pkrtz_f16_f32", false) |
| { |
| setFlag(ALU); |
| setFlag(F32); |
| } // Inst_VOP3__V_CVT_PKRTZ_F16_F32 |
| |
| Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32() |
| { |
| } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32 |
| |
| void |
| Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_pk_u16_u32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CVT_PK_U16_U32 |
| |
| Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32() |
| { |
| } // ~Inst_VOP3__V_CVT_PK_U16_U32 |
| |
| // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}. |
| void |
| Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3 *iFmt) |
| : Inst_VOP3(iFmt, "v_cvt_pk_i16_i32", false) |
| { |
| setFlag(ALU); |
| } // Inst_VOP3__V_CVT_PK_I16_I32 |
| |
| Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32() |
| { |
| } // ~Inst_VOP3__V_CVT_PK_I16_I32 |
| |
| // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}. |
| void |
| Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_add_u32") |
| { |
| } // Inst_DS__DS_ADD_U32 |
| |
| Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32() |
| { |
| } // ~Inst_DS__DS_ADD_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_sub_u32") |
| { |
| } // Inst_DS__DS_SUB_U32 |
| |
| Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32() |
| { |
| } // ~Inst_DS__DS_SUB_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_rsub_u32") |
| { |
| } // Inst_DS__DS_RSUB_U32 |
| |
| Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32() |
| { |
| } // ~Inst_DS__DS_RSUB_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA - MEM[ADDR]; |
| // RETURN_DATA = tmp. |
| // Subtraction with reversed operands. |
| void |
| Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_inc_u32") |
| { |
| } // Inst_DS__DS_INC_U32 |
| |
| Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32() |
| { |
| } // ~Inst_DS__DS_INC_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_dec_u32") |
| { |
| } // Inst_DS__DS_DEC_U32 |
| |
| Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32() |
| { |
| } // ~Inst_DS__DS_DEC_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 |
| // (unsigned compare); RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_i32") |
| { |
| } // Inst_DS__DS_MIN_I32 |
| |
| Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32() |
| { |
| } // ~Inst_DS__DS_MIN_I32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_i32") |
| { |
| } // Inst_DS__DS_MAX_I32 |
| |
| Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32() |
| { |
| } // ~Inst_DS__DS_MAX_I32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_u32") |
| { |
| } // Inst_DS__DS_MIN_U32 |
| |
| Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32() |
| { |
| } // ~Inst_DS__DS_MIN_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_u32") |
| { |
| } // Inst_DS__DS_MAX_U32 |
| |
| Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32() |
| { |
| } // ~Inst_DS__DS_MAX_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_and_b32") |
| { |
| } // Inst_DS__DS_AND_B32 |
| |
| Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32() |
| { |
| } // ~Inst_DS__DS_AND_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] &= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_or_b32") |
| { |
| } // Inst_DS__DS_OR_B32 |
| |
| Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32() |
| { |
| } // ~Inst_DS__DS_OR_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] |= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_xor_b32") |
| { |
| } // Inst_DS__DS_XOR_B32 |
| |
| Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32() |
| { |
| } // ~Inst_DS__DS_XOR_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] ^= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_mskor_b32") |
| { |
| } // Inst_DS__DS_MSKOR_B32 |
| |
| Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32() |
| { |
| } // ~Inst_DS__DS_MSKOR_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write_b32") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE_B32 |
| |
| Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32() |
| { |
| } // ~Inst_DS__DS_WRITE_B32 |
| |
| // MEM[ADDR] = DATA. |
| // Write dword. |
| void |
| Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA0); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0; |
| Addr offset1 = instData.OFFSET1; |
| Addr offset = (offset1 << 8) | offset0; |
| |
| initMemWrite<VecElemU32>(gpuDynInst, offset); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write2_b32") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE2_B32 |
| |
| Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32() |
| { |
| } // ~Inst_DS__DS_WRITE2_B32 |
| |
| // MEM[ADDR_BASE + OFFSET0 * 4] = DATA; |
| // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2. |
| // Write 2 dwords. |
| void |
| Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); |
| ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); |
| |
| addr.read(); |
| data0.read(); |
| data1.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2] |
| = data0[lane]; |
| (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; |
| } |
| } |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0 * 4; |
| Addr offset1 = instData.OFFSET1 * 4; |
| |
| initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1); |
| } |
| |
| void |
| Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write2st64_b32") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE2ST64_B32 |
| |
| Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32() |
| { |
| } // ~Inst_DS__DS_WRITE2ST64_B32 |
| |
| // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA; |
| // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2; |
| // Write 2 dwords. |
| void |
| Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); |
| ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); |
| |
| addr.read(); |
| data0.read(); |
| data1.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2] |
| = data0[lane]; |
| (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; |
| } |
| } |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0 * 4 * 64; |
| Addr offset1 = instData.OFFSET1 * 4 * 64; |
| |
| initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1); |
| } |
| |
| void |
| Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| // --- Inst_DS__DS_CMPST_B32 class methods --- |
| |
| Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_cmpst_b32") |
| { |
| } // Inst_DS__DS_CMPST_B32 |
| |
| Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32() |
| { |
| } // ~Inst_DS__DS_CMPST_B32 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA2; |
| // cmp = DATA; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| // Compare and store. |
| void |
| Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_cmpst_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_CMPST_F32 |
| |
| Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32() |
| { |
| } // ~Inst_DS__DS_CMPST_F32 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA2; |
| // cmp = DATA; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| void |
| Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_MIN_F32 |
| |
| Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32() |
| { |
| } // ~Inst_DS__DS_MIN_F32 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA; |
| // cmp = DATA2; |
| // MEM[ADDR] = (cmp < tmp) ? src : tmp. |
| void |
| Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_MAX_F32 |
| |
| Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32() |
| { |
| } // ~Inst_DS__DS_MAX_F32 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA; |
| // cmp = DATA2; |
| // MEM[ADDR] = (tmp > cmp) ? src : tmp. |
| void |
| Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_nop") |
| { |
| setFlag(Nop); |
| } // Inst_DS__DS_NOP |
| |
| Inst_DS__DS_NOP::~Inst_DS__DS_NOP() |
| { |
| } // ~Inst_DS__DS_NOP |
| |
| // Do nothing. |
| void |
| Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_add_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_ADD_F32 |
| |
| Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32() |
| { |
| } // ~Inst_DS__DS_ADD_F32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write_b8") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE_B8 |
| |
| Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8() |
| { |
| } // ~Inst_DS__DS_WRITE_B8 |
| |
| // MEM[ADDR] = DATA[7:0]. |
| void |
| Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU8 data(gpuDynInst, extData.DATA0); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0; |
| Addr offset1 = instData.OFFSET1; |
| Addr offset = (offset1 << 8) | offset0; |
| |
| initMemWrite<VecElemU8>(gpuDynInst, offset); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| // --- Inst_DS__DS_WRITE_B16 class methods --- |
| |
| Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write_b16") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE_B16 |
| |
| Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16() |
| { |
| } // ~Inst_DS__DS_WRITE_B16 |
| |
| // MEM[ADDR] = DATA[15:0] |
| void |
| Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU16 data(gpuDynInst, extData.DATA0); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0; |
| Addr offset1 = instData.OFFSET1; |
| Addr offset = (offset1 << 8) | offset0; |
| |
| initMemWrite<VecElemU16>(gpuDynInst, offset); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| // --- Inst_DS__DS_ADD_RTN_U32 class methods --- |
| |
| Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_add_rtn_u32") |
| { |
| } // Inst_DS__DS_ADD_RTN_U32 |
| |
| Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32() |
| { |
| } // ~Inst_DS__DS_ADD_RTN_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_sub_rtn_u32") |
| { |
| } // Inst_DS__DS_SUB_RTN_U32 |
| |
| Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32() |
| { |
| } // ~Inst_DS__DS_SUB_RTN_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_rsub_rtn_u32") |
| { |
| } // Inst_DS__DS_RSUB_RTN_U32 |
| |
| Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32() |
| { |
| } // ~Inst_DS__DS_RSUB_RTN_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA - MEM[ADDR]; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_inc_rtn_u32") |
| { |
| } // Inst_DS__DS_INC_RTN_U32 |
| |
| Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32() |
| { |
| } // ~Inst_DS__DS_INC_RTN_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_dec_rtn_u32") |
| { |
| } // Inst_DS__DS_DEC_RTN_U32 |
| |
| Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32() |
| { |
| } // ~Inst_DS__DS_DEC_RTN_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 |
| // (unsigned compare); RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_rtn_i32") |
| { |
| } // Inst_DS__DS_MIN_RTN_I32 |
| |
| Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32() |
| { |
| } // ~Inst_DS__DS_MIN_RTN_I32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_rtn_i32") |
| { |
| } // Inst_DS__DS_MAX_RTN_I32 |
| |
| Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32() |
| { |
| } // ~Inst_DS__DS_MAX_RTN_I32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_rtn_u32") |
| { |
| } // Inst_DS__DS_MIN_RTN_U32 |
| |
| Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32() |
| { |
| } // ~Inst_DS__DS_MIN_RTN_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_rtn_u32") |
| { |
| } // Inst_DS__DS_MAX_RTN_U32 |
| |
| Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32() |
| { |
| } // ~Inst_DS__DS_MAX_RTN_U32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_and_rtn_b32") |
| { |
| } // Inst_DS__DS_AND_RTN_B32 |
| |
| Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32() |
| { |
| } // ~Inst_DS__DS_AND_RTN_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] &= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_or_rtn_b32") |
| { |
| } // Inst_DS__DS_OR_RTN_B32 |
| |
| Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32() |
| { |
| } // ~Inst_DS__DS_OR_RTN_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] |= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_xor_rtn_b32") |
| { |
| } // Inst_DS__DS_XOR_RTN_B32 |
| |
| Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32() |
| { |
| } // ~Inst_DS__DS_XOR_RTN_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] ^= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_mskor_rtn_b32") |
| { |
| } // Inst_DS__DS_MSKOR_RTN_B32 |
| |
| Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32() |
| { |
| } // ~Inst_DS__DS_MSKOR_RTN_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_wrxchg_rtn_b32") |
| { |
| } // Inst_DS__DS_WRXCHG_RTN_B32 |
| |
| Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32() |
| { |
| } // ~Inst_DS__DS_WRXCHG_RTN_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA; |
| // RETURN_DATA = tmp. |
| // Write-exchange operation. |
| void |
| Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32") |
| { |
| } // Inst_DS__DS_WRXCHG2_RTN_B32 |
| |
| Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32() |
| { |
| } // ~Inst_DS__DS_WRXCHG2_RTN_B32 |
| |
| // Write-exchange 2 separate dwords. |
| void |
| Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32( |
| InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32") |
| { |
| } // Inst_DS__DS_WRXCHG2ST64_RTN_B32 |
| |
| Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32() |
| { |
| } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32 |
| |
| // Write-exchange 2 separate dwords with a stride of 64 dwords. |
| void |
| Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_cmpst_rtn_b32") |
| { |
| } // Inst_DS__DS_CMPST_RTN_B32 |
| |
| Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32() |
| { |
| } // ~Inst_DS__DS_CMPST_RTN_B32 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA2; |
| // cmp = DATA; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| // Compare and store. |
| void |
| Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_cmpst_rtn_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_CMPST_RTN_F32 |
| |
| Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32() |
| { |
| } // ~Inst_DS__DS_CMPST_RTN_F32 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA2; |
| // cmp = DATA; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| void |
| Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_rtn_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_MIN_RTN_F32 |
| |
| Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32() |
| { |
| } // ~Inst_DS__DS_MIN_RTN_F32 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA; |
| // cmp = DATA2; |
| // MEM[ADDR] = (cmp < tmp) ? src : tmp. |
| void |
| Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_rtn_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_MAX_RTN_F32 |
| |
| Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32() |
| { |
| } // ~Inst_DS__DS_MAX_RTN_F32 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA; |
| // cmp = DATA2; |
| // MEM[ADDR] = (tmp > cmp) ? src : tmp. |
| void |
| Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_wrap_rtn_b32") |
| { |
| } // Inst_DS__DS_WRAP_RTN_B32 |
| |
| Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32() |
| { |
| } // ~Inst_DS__DS_WRAP_RTN_B32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_add_rtn_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_ADD_RTN_F32 |
| |
| Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32() |
| { |
| } // ~Inst_DS__DS_ADD_RTN_F32 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read_b32") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ_B32 |
| |
| Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32() |
| { |
| } // ~Inst_DS__DS_READ_B32 |
| |
| // RETURN_DATA = MEM[ADDR]. |
| // Dword read. |
| void |
| Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0; |
| Addr offset1 = instData.OFFSET1; |
| Addr offset = (offset1 << 8) | offset0; |
| |
| initMemRead<VecElemU32>(gpuDynInst, offset); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } // completeAcc |
| |
| Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read2_b32") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ2_B32 |
| |
| Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32() |
| { |
| } // ~Inst_DS__DS_READ2_B32 |
| |
| // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4]; |
| // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4]. |
| // Read 2 dwords. |
| void |
| Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0 * 4; |
| Addr offset1 = instData.OFFSET1 * 4; |
| |
| initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst0(gpuDynInst, extData.VDST); |
| VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst0[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 2]; |
| vdst1[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 2 + 1]; |
| } |
| } |
| |
| vdst0.write(); |
| vdst1.write(); |
| } // completeAcc |
| |
| Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read2st64_b32") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ2ST64_B32 |
| |
| Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32() |
| { |
| } // ~Inst_DS__DS_READ2ST64_B32 |
| |
| // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64]; |
| // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64]. |
| // Read 2 dwords. |
| void |
| Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = (instData.OFFSET0 * 4 * 64); |
| Addr offset1 = (instData.OFFSET1 * 4 * 64); |
| |
| initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1); |
| } |
| |
| void |
| Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst0(gpuDynInst, extData.VDST); |
| VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst0[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane * 2]; |
| vdst1[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane * 2 + 1]; |
| } |
| } |
| |
| vdst0.write(); |
| vdst1.write(); |
| } |
| // --- Inst_DS__DS_READ_I8 class methods --- |
| |
| Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read_i8") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ_I8 |
| |
| Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8() |
| { |
| } // ~Inst_DS__DS_READ_I8 |
| |
| // RETURN_DATA = signext(MEM[ADDR][7:0]). |
| // Signed byte read. |
| void |
| Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read_u8") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ_U8 |
| |
| Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8() |
| { |
| } // ~Inst_DS__DS_READ_U8 |
| |
| // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}. |
| // Unsigned byte read. |
| void |
| Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0; |
| Addr offset1 = instData.OFFSET1; |
| Addr offset = (offset1 << 8) | offset0; |
| |
| initMemRead<VecElemU8>(gpuDynInst, offset); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU8*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } // completeAcc |
| // --- Inst_DS__DS_READ_I16 class methods --- |
| |
| Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read_i16") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ_I16 |
| |
| Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16() |
| { |
| } // ~Inst_DS__DS_READ_I16 |
| |
| // RETURN_DATA = signext(MEM[ADDR][15:0]). |
| // Signed short read. |
| void |
| Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read_u16") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ_U16 |
| |
| Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16() |
| { |
| } // ~Inst_DS__DS_READ_U16 |
| |
| // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}. |
| // Unsigned short read. |
| void |
| Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| void |
| Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0; |
| Addr offset1 = instData.OFFSET1; |
| Addr offset = (offset1 << 8) | offset0; |
| |
| initMemRead<VecElemU16>(gpuDynInst, offset); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU16*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } // completeAcc |
| // --- Inst_DS__DS_SWIZZLE_B32 class methods --- |
| |
| Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_swizzle_b32") |
| { |
| setFlag(Load); |
| } // Inst_DS__DS_SWIZZLE_B32 |
| |
| Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32() |
| { |
| } // ~Inst_DS__DS_SWIZZLE_B32 |
| |
| // RETURN_DATA = swizzle(vgpr_data, offset1:offset0). |
| // Dword swizzle, no data is written to LDS memory; |
| void |
| Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| wf->rdLmReqsInPipe--; |
| wf->validateRequestCounters(); |
| |
| if (gpuDynInst->exec_mask.none()) { |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit() |
| ->cyclesToTicks(Cycles(24))); |
| |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA0); |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| /** |
| * The "DS pattern" is comprised of both offset fields. That is, the |
| * swizzle pattern between lanes. Bit 15 of the DS pattern dictates |
| * which swizzle mode to use. There are two different swizzle |
| * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use |
| * QDMode else use Bit-masks mode. The remaining bits dictate how to |
| * swizzle the lanes. |
| * |
| * QDMode: Chunks the lanes into 4s and swizzles among them. |
| * Bits 7:6 dictate where lane 3 (of the current chunk) |
| * gets its date, 5:4 lane 2, etc. |
| * |
| * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks. |
| * 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0 |
| * is the and_mask. Each lane is swizzled by performing |
| * the appropriate operation using these masks. |
| */ |
| VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0); |
| |
| data.read(); |
| |
| if (bits(ds_pattern, 15)) { |
| // QDMode |
| for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) { |
| /** |
| * This operation allows data sharing between groups |
| * of four consecutive threads. Note the increment by |
| * 4 in the for loop. |
| */ |
| if (gpuDynInst->exec_mask[lane]) { |
| int index0 = lane + bits(ds_pattern, 1, 0); |
| panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) " |
| "is out of bounds.\n", gpuDynInst->disassemble(), |
| index0); |
| vdst[lane] |
| = gpuDynInst->exec_mask[index0] ? data[index0]: 0; |
| } |
| if (gpuDynInst->exec_mask[lane + 1]) { |
| int index1 = lane + bits(ds_pattern, 3, 2); |
| panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) " |
| "is out of bounds.\n", gpuDynInst->disassemble(), |
| index1); |
| vdst[lane + 1] |
| = gpuDynInst->exec_mask[index1] ? data[index1]: 0; |
| } |
| if (gpuDynInst->exec_mask[lane + 2]) { |
| int index2 = lane + bits(ds_pattern, 5, 4); |
| panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) " |
| "is out of bounds.\n", gpuDynInst->disassemble(), |
| index2); |
| vdst[lane + 2] |
| = gpuDynInst->exec_mask[index2] ? data[index2]: 0; |
| } |
| if (gpuDynInst->exec_mask[lane + 3]) { |
| int index3 = lane + bits(ds_pattern, 7, 6); |
| panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) " |
| "is out of bounds.\n", gpuDynInst->disassemble(), |
| index3); |
| vdst[lane + 3] |
| = gpuDynInst->exec_mask[index3] ? data[index3]: 0; |
| } |
| } |
| } else { |
| // Bit Mode |
| int and_mask = bits(ds_pattern, 4, 0); |
| int or_mask = bits(ds_pattern, 9, 5); |
| int xor_mask = bits(ds_pattern, 14, 10); |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| int index = (((lane & and_mask) | or_mask) ^ xor_mask); |
| // Adjust for the next 32 lanes. |
| if (lane > 31) { |
| index += 32; |
| } |
| panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is " |
| "out of bounds.\n", gpuDynInst->disassemble(), |
| index); |
| vdst[lane] |
| = gpuDynInst->exec_mask[index] ? data[index] : 0; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } // execute |
| // --- Inst_DS__DS_PERMUTE_B32 class methods --- |
| |
| Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_permute_b32") |
| { |
| setFlag(MemoryRef); |
| /** |
| * While this operation doesn't actually use DS storage we classify |
| * it as a load here because it does a writeback to a VGPR, which |
| * fits in better with the LDS pipeline logic. |
| */ |
| setFlag(Load); |
| } // Inst_DS__DS_PERMUTE_B32 |
| |
| Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32() |
| { |
| } // ~Inst_DS__DS_PERMUTE_B32 |
| |
| // Forward permute. |
| void |
| Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit() |
| ->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA0); |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| addr.read(); |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| /** |
| * One of the offset fields can be used for the index. |
| * It is assumed OFFSET0 would be used, as OFFSET1 is |
| * typically only used for DS ops that operate on two |
| * disparate pieces of data. |
| */ |
| assert(!instData.OFFSET1); |
| /** |
| * The address provided is a byte address, but VGPRs are |
| * 4 bytes, so we must divide by 4 to get the actual VGPR |
| * index. Additionally, the index is calculated modulo the |
| * WF size, 64 in this case, so we simply extract bits 7-2. |
| */ |
| int index = bits(addr[lane] + instData.OFFSET0, 7, 2); |
| panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " |
| "of bounds.\n", gpuDynInst->disassemble(), index); |
| /** |
| * If the shuffled index corresponds to a lane that is |
| * inactive then this instruction writes a 0 to the active |
| * lane in VDST. |
| */ |
| if (wf->execMask(index)) { |
| vdst[index] = data[lane]; |
| } else { |
| vdst[index] = 0; |
| } |
| } |
| } |
| |
| vdst.write(); |
| |
| wf->decLGKMInstsIssued(); |
| wf->rdLmReqsInPipe--; |
| wf->validateRequestCounters(); |
| } // execute |
| // --- Inst_DS__DS_BPERMUTE_B32 class methods --- |
| |
| Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_bpermute_b32") |
| { |
| setFlag(MemoryRef); |
| /** |
| * While this operation doesn't actually use DS storage we classify |
| * it as a load here because it does a writeback to a VGPR, which |
| * fits in better with the LDS pipeline logic. |
| */ |
| setFlag(Load); |
| } // Inst_DS__DS_BPERMUTE_B32 |
| |
| Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32() |
| { |
| } // ~Inst_DS__DS_BPERMUTE_B32 |
| |
| // Backward permute. |
| void |
| Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit() |
| ->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA0); |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| addr.read(); |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| /** |
| * One of the offset fields can be used for the index. |
| * It is assumed OFFSET0 would be used, as OFFSET1 is |
| * typically only used for DS ops that operate on two |
| * disparate pieces of data. |
| */ |
| assert(!instData.OFFSET1); |
| /** |
| * The address provided is a byte address, but VGPRs are |
| * 4 bytes, so we must divide by 4 to get the actual VGPR |
| * index. Additionally, the index is calculated modulo the |
| * WF size, 64 in this case, so we simply extract bits 7-2. |
| */ |
| int index = bits(addr[lane] + instData.OFFSET0, 7, 2); |
| panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " |
| "of bounds.\n", gpuDynInst->disassemble(), index); |
| /** |
| * If the shuffled index corresponds to a lane that is |
| * inactive then this instruction writes a 0 to the active |
| * lane in VDST. |
| */ |
| if (wf->execMask(index)) { |
| vdst[lane] = data[index]; |
| } else { |
| vdst[lane] = 0; |
| } |
| } |
| } |
| |
| vdst.write(); |
| |
| wf->decLGKMInstsIssued(); |
| wf->rdLmReqsInPipe--; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| // --- Inst_DS__DS_ADD_U64 class methods --- |
| |
| Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_add_u64") |
| { |
| } // Inst_DS__DS_ADD_U64 |
| |
| Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64() |
| { |
| } // ~Inst_DS__DS_ADD_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_sub_u64") |
| { |
| } // Inst_DS__DS_SUB_U64 |
| |
| Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64() |
| { |
| } // ~Inst_DS__DS_SUB_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_rsub_u64") |
| { |
| } // Inst_DS__DS_RSUB_U64 |
| |
| Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64() |
| { |
| } // ~Inst_DS__DS_RSUB_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA - MEM[ADDR]; |
| // RETURN_DATA = tmp. |
| // Subtraction with reversed operands. |
| void |
| Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_inc_u64") |
| { |
| } // Inst_DS__DS_INC_U64 |
| |
| Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64() |
| { |
| } // ~Inst_DS__DS_INC_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_dec_u64") |
| { |
| } // Inst_DS__DS_DEC_U64 |
| |
| Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64() |
| { |
| } // ~Inst_DS__DS_DEC_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 |
| // (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_i64") |
| { |
| } // Inst_DS__DS_MIN_I64 |
| |
| Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64() |
| { |
| } // ~Inst_DS__DS_MIN_I64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_i64") |
| { |
| } // Inst_DS__DS_MAX_I64 |
| |
| Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64() |
| { |
| } // ~Inst_DS__DS_MAX_I64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_u64") |
| { |
| } // Inst_DS__DS_MIN_U64 |
| |
| Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64() |
| { |
| } // ~Inst_DS__DS_MIN_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_u64") |
| { |
| } // Inst_DS__DS_MAX_U64 |
| |
| Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64() |
| { |
| } // ~Inst_DS__DS_MAX_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_and_b64") |
| { |
| } // Inst_DS__DS_AND_B64 |
| |
| Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64() |
| { |
| } // ~Inst_DS__DS_AND_B64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] &= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_or_b64") |
| { |
| } // Inst_DS__DS_OR_B64 |
| |
| Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64() |
| { |
| } // ~Inst_DS__DS_OR_B64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] |= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_xor_b64") |
| { |
| } // Inst_DS__DS_XOR_B64 |
| |
| Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64() |
| { |
| } // ~Inst_DS__DS_XOR_B64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] ^= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_mskor_b64") |
| { |
| } // Inst_DS__DS_MSKOR_B64 |
| |
| Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64() |
| { |
| } // ~Inst_DS__DS_MSKOR_B64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; |
| // RETURN_DATA = tmp. |
| void |
| Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write_b64") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE_B64 |
| |
| Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64() |
| { |
| } // ~Inst_DS__DS_WRITE_B64 |
| |
| // MEM[ADDR] = DATA. |
| // Write qword. |
| void |
| Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU64 data(gpuDynInst, extData.DATA0); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0; |
| Addr offset1 = instData.OFFSET1; |
| Addr offset = (offset1 << 8) | offset0; |
| |
| initMemWrite<VecElemU64>(gpuDynInst, offset); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write2_b64") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE2_B64 |
| |
| Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64() |
| { |
| } // ~Inst_DS__DS_WRITE2_B64 |
| |
| // MEM[ADDR_BASE + OFFSET0 * 8] = DATA; |
| // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2. |
| // Write 2 qwords. |
| void |
| Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); |
| ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); |
| |
| addr.read(); |
| data0.read(); |
| data1.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (wf->execMask(lane)) { |
| (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane * 2] = data0[lane]; |
| (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; |
| } |
| } |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0 * 8; |
| Addr offset1 = instData.OFFSET1 * 8; |
| |
| initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1); |
| } |
| |
| void |
| Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write2st64_b64") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE2ST64_B64 |
| |
| Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64() |
| { |
| } // ~Inst_DS__DS_WRITE2ST64_B64 |
| |
| // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA; |
| // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2; |
| // Write 2 qwords. |
| void |
| Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_cmpst_b64") |
| { |
| } // Inst_DS__DS_CMPST_B64 |
| |
| Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64() |
| { |
| } // ~Inst_DS__DS_CMPST_B64 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA2; |
| // cmp = DATA; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| // Compare and store. |
| void |
| Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_cmpst_f64") |
| { |
| setFlag(F64); |
| } // Inst_DS__DS_CMPST_F64 |
| |
| Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64() |
| { |
| } // ~Inst_DS__DS_CMPST_F64 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA2; |
| // cmp = DATA; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| void |
| Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_f64") |
| { |
| setFlag(F64); |
| } // Inst_DS__DS_MIN_F64 |
| |
| Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64() |
| { |
| } // ~Inst_DS__DS_MIN_F64 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA; |
| // cmp = DATA2; |
| // MEM[ADDR] = (cmp < tmp) ? src : tmp. |
| void |
| Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_f64") |
| { |
| setFlag(F64); |
| } // Inst_DS__DS_MAX_F64 |
| |
| Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64() |
| { |
| } // ~Inst_DS__DS_MAX_F64 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA; |
| // cmp = DATA2; |
| // MEM[ADDR] = (tmp > cmp) ? src : tmp. |
| void |
| Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_add_rtn_u64") |
| { |
| } // Inst_DS__DS_ADD_RTN_U64 |
| |
| Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64() |
| { |
| } // ~Inst_DS__DS_ADD_RTN_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_sub_rtn_u64") |
| { |
| } // Inst_DS__DS_SUB_RTN_U64 |
| |
| Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64() |
| { |
| } // ~Inst_DS__DS_SUB_RTN_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_rsub_rtn_u64") |
| { |
| } // Inst_DS__DS_RSUB_RTN_U64 |
| |
| Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64() |
| { |
| } // ~Inst_DS__DS_RSUB_RTN_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA - MEM[ADDR]; |
| // RETURN_DATA = tmp. |
| // Subtraction with reversed operands. |
| void |
| Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_inc_rtn_u64") |
| { |
| } // Inst_DS__DS_INC_RTN_U64 |
| |
| Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64() |
| { |
| } // ~Inst_DS__DS_INC_RTN_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_dec_rtn_u64") |
| { |
| } // Inst_DS__DS_DEC_RTN_U64 |
| |
| Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64() |
| { |
| } // ~Inst_DS__DS_DEC_RTN_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 |
| // (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_rtn_i64") |
| { |
| } // Inst_DS__DS_MIN_RTN_I64 |
| |
| Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64() |
| { |
| } // ~Inst_DS__DS_MIN_RTN_I64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_rtn_i64") |
| { |
| } // Inst_DS__DS_MAX_RTN_I64 |
| |
| Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64() |
| { |
| } // ~Inst_DS__DS_MAX_RTN_I64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_rtn_u64") |
| { |
| } // Inst_DS__DS_MIN_RTN_U64 |
| |
| Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64() |
| { |
| } // ~Inst_DS__DS_MIN_RTN_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_rtn_u64") |
| { |
| } // Inst_DS__DS_MAX_RTN_U64 |
| |
| Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64() |
| { |
| } // ~Inst_DS__DS_MAX_RTN_U64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_and_rtn_b64") |
| { |
| } // Inst_DS__DS_AND_RTN_B64 |
| |
| Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64() |
| { |
| } // ~Inst_DS__DS_AND_RTN_B64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] &= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_or_rtn_b64") |
| { |
| } // Inst_DS__DS_OR_RTN_B64 |
| |
| Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64() |
| { |
| } // ~Inst_DS__DS_OR_RTN_B64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] |= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_xor_rtn_b64") |
| { |
| } // Inst_DS__DS_XOR_RTN_B64 |
| |
| Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64() |
| { |
| } // ~Inst_DS__DS_XOR_RTN_B64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] ^= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_mskor_rtn_b64") |
| { |
| } // Inst_DS__DS_MSKOR_RTN_B64 |
| |
| Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64() |
| { |
| } // ~Inst_DS__DS_MSKOR_RTN_B64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; |
| // RETURN_DATA = tmp. |
| // Masked dword OR, D0 contains the mask and D1 contains the new value. |
| void |
| Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_wrxchg_rtn_b64") |
| { |
| } // Inst_DS__DS_WRXCHG_RTN_B64 |
| |
| Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64() |
| { |
| } // ~Inst_DS__DS_WRXCHG_RTN_B64 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA; |
| // RETURN_DATA = tmp. |
| // Write-exchange operation. |
| void |
| Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64") |
| { |
| } // Inst_DS__DS_WRXCHG2_RTN_B64 |
| |
| Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64() |
| { |
| } // ~Inst_DS__DS_WRXCHG2_RTN_B64 |
| |
| // Write-exchange 2 separate qwords. |
| void |
| Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64( |
| InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64") |
| { |
| } // Inst_DS__DS_WRXCHG2ST64_RTN_B64 |
| |
| Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64() |
| { |
| } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64 |
| |
| // Write-exchange 2 qwords with a stride of 64 qwords. |
| void |
| Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_cmpst_rtn_b64") |
| { |
| } // Inst_DS__DS_CMPST_RTN_B64 |
| |
| Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64() |
| { |
| } // ~Inst_DS__DS_CMPST_RTN_B64 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA2; |
| // cmp = DATA; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| // Compare and store. |
| void |
| Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_cmpst_rtn_f64") |
| { |
| setFlag(F64); |
| } // Inst_DS__DS_CMPST_RTN_F64 |
| |
| Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64() |
| { |
| } // ~Inst_DS__DS_CMPST_RTN_F64 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA2; |
| // cmp = DATA; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| void |
| Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_rtn_f64") |
| { |
| setFlag(F64); |
| } // Inst_DS__DS_MIN_RTN_F64 |
| |
| Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64() |
| { |
| } // ~Inst_DS__DS_MIN_RTN_F64 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA; |
| // cmp = DATA2; |
| // MEM[ADDR] = (cmp < tmp) ? src : tmp. |
| void |
| Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_rtn_f64") |
| { |
| setFlag(F64); |
| } // Inst_DS__DS_MAX_RTN_F64 |
| |
| Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64() |
| { |
| } // ~Inst_DS__DS_MAX_RTN_F64 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA; |
| // cmp = DATA2; |
| // MEM[ADDR] = (tmp > cmp) ? src : tmp. |
| void |
| Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read_b64") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ_B64 |
| |
| Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64() |
| { |
| } // ~Inst_DS__DS_READ_B64 |
| |
| // RETURN_DATA = MEM[ADDR]. |
| // Read 1 qword. |
| void |
| Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0; |
| Addr offset1 = instData.OFFSET1; |
| Addr offset = (offset1 << 8) | offset0; |
| |
| initMemRead<VecElemU64>(gpuDynInst, offset); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU64 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } // completeAcc |
| |
| Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read2_b64") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ2_B64 |
| |
| Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64() |
| { |
| } // ~Inst_DS__DS_READ2_B64 |
| |
| // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8]; |
| // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8]. |
| // Read 2 qwords. |
| void |
| Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = instData.OFFSET0 * 8; |
| Addr offset1 = instData.OFFSET1 * 8; |
| |
| initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1); |
| } // initiateAcc |
| |
| void |
| Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU64 vdst0(gpuDynInst, extData.VDST); |
| VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst0[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane * 2]; |
| vdst1[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane * 2 + 1]; |
| } |
| } |
| |
| vdst0.write(); |
| vdst1.write(); |
| } // completeAcc |
| |
| Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read2st64_b64") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ2ST64_B64 |
| |
| Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64() |
| { |
| } // ~Inst_DS__DS_READ2ST64_B64 |
| |
| // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64]; |
| // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64]. |
| // Read 2 qwords. |
| void |
| Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set( |
| gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); |
| ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); |
| |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Addr offset0 = (instData.OFFSET0 * 8 * 64); |
| Addr offset1 = (instData.OFFSET1 * 8 * 64); |
| |
| initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1); |
| } |
| |
| void |
| Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU64 vdst0(gpuDynInst, extData.VDST); |
| VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst0[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane * 2]; |
| vdst1[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane * 2 + 1]; |
| } |
| } |
| |
| vdst0.write(); |
| vdst1.write(); |
| } |
| |
| Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64( |
| InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_condxchg32_rtn_b64") |
| { |
| } // Inst_DS__DS_CONDXCHG32_RTN_B64 |
| |
| Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64() |
| { |
| } // ~Inst_DS__DS_CONDXCHG32_RTN_B64 |
| |
| // Conditional write exchange. |
| void |
| Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_add_src2_u32") |
| { |
| } // Inst_DS__DS_ADD_SRC2_U32 |
| |
| Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32() |
| { |
| } // ~Inst_DS__DS_ADD_SRC2_U32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] + MEM[B]. |
| void |
| Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_sub_src2_u32") |
| { |
| } // Inst_DS__DS_SUB_SRC2_U32 |
| |
| Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32() |
| { |
| } // ~Inst_DS__DS_SUB_SRC2_U32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] - MEM[B]. |
| void |
| Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_rsub_src2_u32") |
| { |
| } // Inst_DS__DS_RSUB_SRC2_U32 |
| |
| Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32() |
| { |
| } // ~Inst_DS__DS_RSUB_SRC2_U32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[B] - MEM[A]. |
| void |
| Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_inc_src2_u32") |
| { |
| } // Inst_DS__DS_INC_SRC2_U32 |
| |
| Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32() |
| { |
| } // ~Inst_DS__DS_INC_SRC2_U32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). |
| void |
| Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_dec_src2_u32") |
| { |
| } // Inst_DS__DS_DEC_SRC2_U32 |
| |
| Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32() |
| { |
| } // ~Inst_DS__DS_DEC_SRC2_U32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). |
| // Uint decrement. |
| void |
| Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_src2_i32") |
| { |
| } // Inst_DS__DS_MIN_SRC2_I32 |
| |
| Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32() |
| { |
| } // ~Inst_DS__DS_MIN_SRC2_I32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = min(MEM[A], MEM[B]). |
| void |
| Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_src2_i32") |
| { |
| } // Inst_DS__DS_MAX_SRC2_I32 |
| |
| Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32() |
| { |
| } // ~Inst_DS__DS_MAX_SRC2_I32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = max(MEM[A], MEM[B]). |
| void |
| Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_src2_u32") |
| { |
| } // Inst_DS__DS_MIN_SRC2_U32 |
| |
| Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32() |
| { |
| } // ~Inst_DS__DS_MIN_SRC2_U32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = min(MEM[A], MEM[B]). |
| void |
| Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_src2_u32") |
| { |
| } // Inst_DS__DS_MAX_SRC2_U32 |
| |
| Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32() |
| { |
| } // ~Inst_DS__DS_MAX_SRC2_U32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = max(MEM[A], MEM[B]). |
| void |
| Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_and_src2_b32") |
| { |
| } // Inst_DS__DS_AND_SRC2_B32 |
| |
| Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32() |
| { |
| } // ~Inst_DS__DS_AND_SRC2_B32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] & MEM[B]. |
| void |
| Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_or_src2_b32") |
| { |
| } // Inst_DS__DS_OR_SRC2_B32 |
| |
| Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32() |
| { |
| } // ~Inst_DS__DS_OR_SRC2_B32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] | MEM[B]. |
| void |
| Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_xor_src2_b32") |
| { |
| } // Inst_DS__DS_XOR_SRC2_B32 |
| |
| Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32() |
| { |
| } // ~Inst_DS__DS_XOR_SRC2_B32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] ^ MEM[B]. |
| void |
| Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write_src2_b32") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE_SRC2_B32 |
| |
| Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32() |
| { |
| } // ~Inst_DS__DS_WRITE_SRC2_B32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[B]. |
| // Write dword. |
| void |
| Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_src2_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_MIN_SRC2_F32 |
| |
| Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32() |
| { |
| } // ~Inst_DS__DS_MIN_SRC2_F32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. |
| void |
| Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_src2_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_MAX_SRC2_F32 |
| |
| Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32() |
| { |
| } // ~Inst_DS__DS_MAX_SRC2_F32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. |
| void |
| Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_add_src2_f32") |
| { |
| setFlag(F32); |
| } // Inst_DS__DS_ADD_SRC2_F32 |
| |
| Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32() |
| { |
| } // ~Inst_DS__DS_ADD_SRC2_F32 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[B] + MEM[A]. |
| void |
| Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL( |
| InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_gws_sema_release_all") |
| { |
| } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL |
| |
| Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL() |
| { |
| } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL |
| |
| void |
| Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_gws_init") |
| { |
| } // Inst_DS__DS_GWS_INIT |
| |
| Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT() |
| { |
| } // ~Inst_DS__DS_GWS_INIT |
| |
| void |
| Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_gws_sema_v") |
| { |
| } // Inst_DS__DS_GWS_SEMA_V |
| |
| Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V() |
| { |
| } // ~Inst_DS__DS_GWS_SEMA_V |
| |
| void |
| Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_gws_sema_br") |
| { |
| } // Inst_DS__DS_GWS_SEMA_BR |
| |
| Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR() |
| { |
| } // ~Inst_DS__DS_GWS_SEMA_BR |
| |
| void |
| Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_gws_sema_p") |
| { |
| } // Inst_DS__DS_GWS_SEMA_P |
| |
| Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P() |
| { |
| } // ~Inst_DS__DS_GWS_SEMA_P |
| |
| void |
| Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_gws_barrier") |
| { |
| } // Inst_DS__DS_GWS_BARRIER |
| |
| Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER() |
| { |
| } // ~Inst_DS__DS_GWS_BARRIER |
| |
| void |
| Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_consume") |
| { |
| } // Inst_DS__DS_CONSUME |
| |
| Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME() |
| { |
| } // ~Inst_DS__DS_CONSUME |
| |
| void |
| Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_append") |
| { |
| } // Inst_DS__DS_APPEND |
| |
| Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND() |
| { |
| } // ~Inst_DS__DS_APPEND |
| |
| void |
| Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_ordered_count") |
| { |
| } // Inst_DS__DS_ORDERED_COUNT |
| |
| Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT() |
| { |
| } // ~Inst_DS__DS_ORDERED_COUNT |
| |
| void |
| Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_add_src2_u64") |
| { |
| } // Inst_DS__DS_ADD_SRC2_U64 |
| |
| Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64() |
| { |
| } // ~Inst_DS__DS_ADD_SRC2_U64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] + MEM[B]. |
| void |
| Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_sub_src2_u64") |
| { |
| } // Inst_DS__DS_SUB_SRC2_U64 |
| |
| Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64() |
| { |
| } // ~Inst_DS__DS_SUB_SRC2_U64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] - MEM[B]. |
| void |
| Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_rsub_src2_u64") |
| { |
| } // Inst_DS__DS_RSUB_SRC2_U64 |
| |
| Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64() |
| { |
| } // ~Inst_DS__DS_RSUB_SRC2_U64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[B] - MEM[A]. |
| void |
| Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_inc_src2_u64") |
| { |
| } // Inst_DS__DS_INC_SRC2_U64 |
| |
| Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64() |
| { |
| } // ~Inst_DS__DS_INC_SRC2_U64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). |
| void |
| Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_dec_src2_u64") |
| { |
| } // Inst_DS__DS_DEC_SRC2_U64 |
| |
| Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64() |
| { |
| } // ~Inst_DS__DS_DEC_SRC2_U64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). |
| // Uint decrement. |
| void |
| Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_src2_i64") |
| { |
| } // Inst_DS__DS_MIN_SRC2_I64 |
| |
| Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64() |
| { |
| } // ~Inst_DS__DS_MIN_SRC2_I64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = min(MEM[A], MEM[B]). |
| void |
| Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_src2_i64") |
| { |
| } // Inst_DS__DS_MAX_SRC2_I64 |
| |
| Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64() |
| { |
| } // ~Inst_DS__DS_MAX_SRC2_I64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = max(MEM[A], MEM[B]). |
| void |
| Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_src2_u64") |
| { |
| } // Inst_DS__DS_MIN_SRC2_U64 |
| |
| Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64() |
| { |
| } // ~Inst_DS__DS_MIN_SRC2_U64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = min(MEM[A], MEM[B]). |
| void |
| Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_src2_u64") |
| { |
| } // Inst_DS__DS_MAX_SRC2_U64 |
| |
| Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64() |
| { |
| } // ~Inst_DS__DS_MAX_SRC2_U64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = max(MEM[A], MEM[B]). |
| void |
| Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_and_src2_b64") |
| { |
| } // Inst_DS__DS_AND_SRC2_B64 |
| |
| Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64() |
| { |
| } // ~Inst_DS__DS_AND_SRC2_B64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] & MEM[B]. |
| void |
| Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_or_src2_b64") |
| { |
| } // Inst_DS__DS_OR_SRC2_B64 |
| |
| Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64() |
| { |
| } // ~Inst_DS__DS_OR_SRC2_B64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] | MEM[B]. |
| void |
| Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_xor_src2_b64") |
| { |
| } // Inst_DS__DS_XOR_SRC2_B64 |
| |
| Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64() |
| { |
| } // ~Inst_DS__DS_XOR_SRC2_B64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[A] ^ MEM[B]. |
| void |
| Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write_src2_b64") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE_SRC2_B64 |
| |
| Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64() |
| { |
| } // ~Inst_DS__DS_WRITE_SRC2_B64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = MEM[B]. |
| // Write qword. |
| void |
| Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_min_src2_f64") |
| { |
| setFlag(F64); |
| } // Inst_DS__DS_MIN_SRC2_F64 |
| |
| Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64() |
| { |
| } // ~Inst_DS__DS_MIN_SRC2_F64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. |
| void |
| Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_max_src2_f64") |
| { |
| setFlag(F64); |
| } // Inst_DS__DS_MAX_SRC2_F64 |
| |
| Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64() |
| { |
| } // ~Inst_DS__DS_MAX_SRC2_F64 |
| |
| // A = ADDR_BASE; |
| // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : |
| // {offset1[6],offset1[6:0],offset0}); |
| // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. |
| void |
| Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write_b96") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE_B96 |
| |
| Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96() |
| { |
| } // ~Inst_DS__DS_WRITE_B96 |
| |
| // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0]. |
| // Tri-dword write. |
| void |
| Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_write_b128") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_DS__DS_WRITE_B128 |
| |
| Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128() |
| { |
| } // ~Inst_DS__DS_WRITE_B128 |
| |
| // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0]. |
| // Qword write. |
| void |
| Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read_b96") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ_B96 |
| |
| Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96() |
| { |
| } // ~Inst_DS__DS_READ_B96 |
| |
| // Tri-dword read. |
| void |
| Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt) |
| : Inst_DS(iFmt, "ds_read_b128") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_DS__DS_READ_B128 |
| |
| Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128() |
| { |
| } // ~Inst_DS__DS_READ_B128 |
| |
| // Qword read. |
| void |
| Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_X |
| ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_format_x") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X |
| |
| // Untyped buffer load 1 dword with format conversion. |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XY |
| ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_format_xy") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY |
| |
| // Untyped buffer load 2 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ |
| ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_format_xyz") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ |
| |
| // Untyped buffer load 3 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW |
| ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_format_xyzw") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW |
| |
| // Untyped buffer load 4 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_X |
| ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_format_x") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_STORE_FORMAT_X |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X |
| |
| // Untyped buffer store 1 dword with format conversion. |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XY |
| ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_format_xy") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY |
| |
| // Untyped buffer store 2 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ |
| ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_format_xyz") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ |
| |
| // Untyped buffer store 3 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW |
| ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_format_xyzw") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW |
| ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW |
| |
| // Untyped buffer store 4 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X |
| ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_format_d16_x") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X |
| ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X |
| |
| // Untyped buffer load 1 dword with format conversion. |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY |
| ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY |
| ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY |
| |
| // Untyped buffer load 2 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ |
| ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ |
| ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ |
| |
| // Untyped buffer load 3 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW |
| ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW |
| |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW |
| ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW |
| |
| // Untyped buffer load 4 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X |
| ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_format_d16_x") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X |
| ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X |
| |
| // Untyped buffer store 1 dword with format conversion. |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY |
| ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY |
| ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY |
| |
| // Untyped buffer store 2 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ |
| ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ |
| ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ |
| |
| // Untyped buffer store 3 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW |
| ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW |
| |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW |
| ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW |
| |
| // Untyped buffer store 4 dwords with format conversion. |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_UBYTE |
| ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_ubyte") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_LOAD_UBYTE |
| |
| Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE |
| |
| // Untyped buffer load unsigned byte (zero extend to VGPR destination). |
| void |
| Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<VecElemU8>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst(gpuDynInst, extData.VDATA); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| if (!oobMask[lane]) { |
| vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>( |
| gpuDynInst->d_data))[lane]); |
| } else { |
| vdst[lane] = 0; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| |
| Inst_MUBUF__BUFFER_LOAD_SBYTE |
| ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_sbyte") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_SBYTE |
| |
| Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE |
| |
| // Untyped buffer load signed byte (sign extend to VGPR destination). |
| void |
| Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_USHORT |
| ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_ushort") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_LOAD_USHORT |
| |
| Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_USHORT |
| |
| // Untyped buffer load unsigned short (zero extend to VGPR destination). |
| void |
| Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<VecElemU16>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst(gpuDynInst, extData.VDATA); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| if (!oobMask[lane]) { |
| vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>( |
| gpuDynInst->d_data))[lane]); |
| } else { |
| vdst[lane] = 0; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } |
| |
| |
| Inst_MUBUF__BUFFER_LOAD_SSHORT |
| ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_sshort") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_LOAD_SSHORT |
| |
| Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT |
| |
| // Untyped buffer load signed short (sign extend to VGPR destination). |
| void |
| Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_LOAD_DWORD |
| ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_dword") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_LOAD_DWORD |
| |
| Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_DWORD |
| |
| // Untyped buffer load dword. |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst(gpuDynInst, extData.VDATA); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| if (!oobMask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane]; |
| } else { |
| vdst[lane] = 0; |
| } |
| } |
| } |
| |
| vdst.write(); |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_LOAD_DWORDX2 |
| ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_dwordx2") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_LOAD_DWORDX2 |
| |
| Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2 |
| |
| // Untyped buffer load 2 dwords. |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<2>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst0(gpuDynInst, extData.VDATA); |
| VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| if (!oobMask[lane]) { |
| vdst0[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 2]; |
| vdst1[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 2 + 1]; |
| } else { |
| vdst0[lane] = 0; |
| vdst1[lane] = 0; |
| } |
| } |
| } |
| |
| vdst0.write(); |
| vdst1.write(); |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_LOAD_DWORDX3 |
| ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_dwordx3") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_LOAD_DWORDX3 |
| |
| Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3 |
| |
| // Untyped buffer load 3 dwords. |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<3>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst0(gpuDynInst, extData.VDATA); |
| VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); |
| VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| if (!oobMask[lane]) { |
| vdst0[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 3]; |
| vdst1[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 3 + 1]; |
| vdst2[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 3 + 2]; |
| } else { |
| vdst0[lane] = 0; |
| vdst1[lane] = 0; |
| vdst2[lane] = 0; |
| } |
| } |
| } |
| |
| vdst0.write(); |
| vdst1.write(); |
| vdst2.write(); |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_LOAD_DWORDX4 |
| ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_load_dwordx4") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_LOAD_DWORDX4 |
| |
| Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4() |
| { |
| } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4 |
| |
| // Untyped buffer load 4 dwords. |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdLmReqsInPipe--; |
| wf->outstandingReqsRdLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<4>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst0(gpuDynInst, extData.VDATA); |
| VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); |
| VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); |
| VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| if (!oobMask[lane]) { |
| vdst0[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4]; |
| vdst1[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4 + 1]; |
| vdst2[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4 + 2]; |
| vdst3[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4 + 3]; |
| } else { |
| vdst0[lane] = 0; |
| vdst1[lane] = 0; |
| vdst2[lane] = 0; |
| vdst3[lane] = 0; |
| } |
| } |
| } |
| |
| vdst0.write(); |
| vdst1.write(); |
| vdst2.write(); |
| vdst3.write(); |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_STORE_BYTE |
| ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_byte") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_STORE_BYTE |
| |
| Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_BYTE |
| |
| // Untyped buffer store byte. |
| void |
| Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandI8 data(gpuDynInst, extData.VDATA); |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemI8*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| initMemWrite<VecElemI8>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_SHORT |
| ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_short") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_STORE_SHORT |
| |
| Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_SHORT |
| |
| // Untyped buffer store short. |
| void |
| Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandI16 data(gpuDynInst, extData.VDATA); |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemI16*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| initMemWrite<VecElemI16>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MUBUF__BUFFER_STORE_DWORD:: |
| Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_dword") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_STORE_DWORD |
| |
| Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_DWORD |
| |
| // Untyped buffer store dword. |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandU32 data(gpuDynInst, extData.VDATA); |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| initMemWrite<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_STORE_DWORDX2 |
| ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_dwordx2") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_STORE_DWORDX2 |
| |
| Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2 |
| |
| // Untyped buffer store 2 dwords. |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); |
| ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| data0.read(); |
| data1.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4] |
| = data0[lane]; |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1] |
| = data1[lane]; |
| } |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemWrite<2>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_STORE_DWORDX3 |
| ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_dwordx3") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_STORE_DWORDX3 |
| |
| Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3 |
| |
| // Untyped buffer store 3 dwords. |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); |
| ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); |
| ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| data0.read(); |
| data1.read(); |
| data2.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4] |
| = data0[lane]; |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1] |
| = data1[lane]; |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2] |
| = data2[lane]; |
| } |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemWrite<3>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_STORE_DWORDX4 |
| ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_dwordx4") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| if (instData.LDS) { |
| setFlag(GroupSegment); |
| } else { |
| setFlag(GlobalSegment); |
| } |
| } // Inst_MUBUF__BUFFER_STORE_DWORDX4 |
| |
| Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4 |
| |
| // Untyped buffer store 4 dwords. |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); |
| ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); |
| ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); |
| ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); |
| ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); |
| ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); |
| ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); |
| ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3); |
| |
| rsrcDesc.read(); |
| offset.read(); |
| data0.read(); |
| data1.read(); |
| data2.read(); |
| data3.read(); |
| |
| int inst_offset = instData.OFFSET; |
| |
| if (!instData.IDXEN && !instData.OFFEN) { |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (!instData.IDXEN && instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr0, addr1, rsrcDesc, offset, inst_offset); |
| } else if (instData.IDXEN && !instData.OFFEN) { |
| addr0.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } else { |
| addr0.read(); |
| addr1.read(); |
| calcAddr<ConstVecOperandU32, ConstVecOperandU32, |
| ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst, |
| addr1, addr0, rsrcDesc, offset, inst_offset); |
| } |
| |
| if (isLocalMem()) { |
| gpuDynInst->computeUnit()->localMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrLmReqsInPipe--; |
| wf->outstandingReqsWrLm++; |
| } else { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4] |
| = data0[lane]; |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1] |
| = data1[lane]; |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2] |
| = data2[lane]; |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 3] |
| = data3[lane]; |
| } |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemWrite<4>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_STORE_LDS_DWORD |
| ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_store_lds_dword") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD |
| |
| Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD() |
| { |
| } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD |
| |
| // Store one DWORD from LDS memory to system memory without utilizing |
| // VGPRs. |
| void |
| Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_wbinvl1") |
| { |
| setFlag(MemoryRef); |
| setFlag(GPUStaticInst::MemSync); |
| setFlag(GlobalSegment); |
| setFlag(MemSync); |
| } // Inst_MUBUF__BUFFER_WBINVL1 |
| |
| Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1() |
| { |
| } // ~Inst_MUBUF__BUFFER_WBINVL1 |
| |
| // Write back and invalidate the shader L1. |
| // Always returns ACK to shader. |
| void |
| Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| |
| wf->outstandingReqsWrGm++; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| injectGlobalMemFence(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_WBINVL1_VOL |
| ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt) |
| : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") { |
| /** |
| * This instruction is same as buffer_wbinvl1 instruction except this |
| * instruction only invalidate L1 shader line with MTYPE for system |
| * or group coherence. Since L1 do not differentiate between its cache |
| * lines, this instruction currently behaves (and implemented ) |
| * exactly like buffer_wbinvl1 instruction. |
| */ |
| setFlag(MemoryRef); |
| setFlag(GPUStaticInst::MemSync); |
| setFlag(GlobalSegment); |
| setFlag(MemSync); |
| } // Inst_MUBUF__BUFFER_WBINVL1_VOL |
| |
| Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL() |
| { |
| } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL |
| |
| // Write back and invalidate the shader L1 only for lines that are marked |
| // volatile. Always returns ACK to shader. |
| void |
| Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| |
| wf->outstandingReqsWrGm++; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| void |
| Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| injectGlobalMemFence(gpuDynInst); |
| } // initiateAcc |
| void |
| Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SWAP |
| ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_swap") |
| { |
| setFlag(AtomicExch); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } // if |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_SWAP |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP |
| ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap") |
| { |
| setFlag(AtomicCAS); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP |
| |
| Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA[0]; |
| // cmp = DATA[1]; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_ADD |
| ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_add") |
| { |
| setFlag(AtomicAdd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } // if |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_ADD |
| |
| Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SUB |
| ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_sub") |
| { |
| setFlag(AtomicSub); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_SUB |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SMIN |
| ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_smin") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_SMIN |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_UMIN |
| ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_umin") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_UMIN |
| |
| Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SMAX |
| ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_smax") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_SMAX |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_UMAX |
| ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_umax") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } // if |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_UMAX |
| |
| Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_AND |
| ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_and") |
| { |
| setFlag(AtomicAnd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_AND |
| |
| Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_AND |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] &= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_OR |
| ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_or") |
| { |
| setFlag(AtomicOr); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_OR |
| |
| Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_OR |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] |= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_XOR |
| ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_xor") |
| { |
| setFlag(AtomicXor); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_XOR |
| |
| Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] ^= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_INC |
| ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_inc") |
| { |
| setFlag(AtomicInc); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_INC |
| |
| Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_INC |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_DEC |
| ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_dec") |
| { |
| setFlag(AtomicDec); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_DEC |
| |
| Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 |
| // (unsigned compare); RETURN_DATA = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2") |
| { |
| setFlag(AtomicExch); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2") |
| { |
| setFlag(AtomicCAS); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 |
| ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA[0:1]; |
| // cmp = DATA[2:3]; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_add_x2") |
| { |
| setFlag(AtomicAdd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2") |
| { |
| setFlag(AtomicSub); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_AND_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_and_x2") |
| { |
| setFlag(AtomicAnd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] &= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_OR_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_or_x2") |
| { |
| setFlag(AtomicOr); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] |= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2") |
| { |
| setFlag(AtomicXor); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] ^= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_INC_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2") |
| { |
| setFlag(AtomicInc); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 |
| ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt) |
| : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2") |
| { |
| setFlag(AtomicDec); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 |
| |
| Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2() |
| { |
| } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 |
| // (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_X |
| ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_load_format_x") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X() |
| { |
| } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X |
| |
| // Typed buffer load 1 dword with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY |
| ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_load_format_xy") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY() |
| { |
| } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY |
| |
| // Typed buffer load 2 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ |
| ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_load_format_xyz") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ() |
| { |
| } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ |
| |
| // Typed buffer load 3 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW |
| ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW |
| ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW() |
| { |
| } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW |
| |
| // Typed buffer load 4 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_X |
| ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_store_format_x") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X() |
| { |
| } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X |
| |
| // Typed buffer store 1 dword with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XY |
| ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_store_format_xy") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY() |
| { |
| } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY |
| |
| // Typed buffer store 2 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ |
| ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_store_format_xyz") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ |
| ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ() |
| { |
| } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ |
| |
| // Typed buffer store 3 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW |
| ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW |
| ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW() |
| { |
| } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW |
| |
| // Typed buffer store 4 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X |
| ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X:: |
| ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X() |
| { |
| } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X |
| |
| // Typed buffer load 1 dword with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY |
| ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY |
| ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY() |
| { |
| } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY |
| |
| // Typed buffer load 2 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ |
| ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ( |
| InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ |
| ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ() |
| { |
| } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ |
| |
| // Typed buffer load 3 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW |
| ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW( |
| InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW |
| |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW |
| ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW() |
| { |
| } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW |
| |
| // Typed buffer load 4 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X |
| ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X |
| ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X() |
| { |
| } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X |
| |
| // Typed buffer store 1 dword with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY |
| ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY |
| ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY() |
| { |
| } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY |
| |
| // Typed buffer store 2 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ |
| ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ |
| ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ() |
| { |
| } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ |
| |
| // Typed buffer store 3 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW |
| ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt) |
| : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW |
| |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW |
| ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW() |
| { |
| } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW |
| |
| // Typed buffer store 4 dwords with format conversion. |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute( |
| GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc( |
| GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_load") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_LOAD |
| |
| Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD() |
| { |
| } // ~Inst_MIMG__IMAGE_LOAD |
| |
| // Image memory load with format conversion specified |
| void |
| Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_load_mip") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_LOAD_MIP |
| |
| Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP() |
| { |
| } // ~Inst_MIMG__IMAGE_LOAD_MIP |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_load_pck") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_LOAD_PCK |
| |
| Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK() |
| { |
| } // ~Inst_MIMG__IMAGE_LOAD_PCK |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_load_pck_sgn") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_LOAD_PCK_SGN |
| |
| Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN() |
| { |
| } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN |
| |
| // Image memory load with with no format conversion and sign extension |
| void |
| Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_load_mip_pck") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_LOAD_MIP_PCK |
| |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK() |
| { |
| } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK |
| |
| // Image memory load with user-supplied mip level, no format conversion |
| void |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_load_mip_pck_sgn") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN |
| |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN() |
| { |
| } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN |
| |
| // Image memory load with user-supplied mip level, no format conversion. |
| void |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_store") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_STORE |
| |
| Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE() |
| { |
| } // ~Inst_MIMG__IMAGE_STORE |
| |
| // Image memory store with format conversion specified |
| void |
| Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_store_mip") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_STORE_MIP |
| |
| Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP() |
| { |
| } // ~Inst_MIMG__IMAGE_STORE_MIP |
| |
| void |
| Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_store_pck") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_STORE_PCK |
| |
| Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK() |
| { |
| } // ~Inst_MIMG__IMAGE_STORE_PCK |
| |
| // Image memory store of packed data without format conversion. |
| void |
| Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_store_mip_pck") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_STORE_MIP_PCK |
| |
| Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK() |
| { |
| } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK |
| |
| // Image memory store of packed data without format conversion |
| void |
| Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_get_resinfo") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GET_RESINFO |
| |
| Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO() |
| { |
| } // ~Inst_MIMG__IMAGE_GET_RESINFO |
| |
| void |
| Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_swap") |
| { |
| setFlag(AtomicExch); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_SWAP |
| |
| Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_cmpswap") |
| { |
| setFlag(AtomicCAS); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP |
| |
| Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA[0]; |
| // cmp = DATA[1]; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_add") |
| { |
| setFlag(AtomicAdd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_ADD |
| |
| Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_ADD |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_sub") |
| { |
| setFlag(AtomicSub); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_SUB |
| |
| Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_SUB |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_smin") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_SMIN |
| |
| Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_umin") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_UMIN |
| |
| Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_smax") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_SMAX |
| |
| Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_umax") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_UMAX |
| |
| Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_and") |
| { |
| setFlag(AtomicAnd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_AND |
| |
| Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_AND |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] &= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_or") |
| { |
| setFlag(AtomicOr); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_OR |
| |
| Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_OR |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] |= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_xor") |
| { |
| setFlag(AtomicXor); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_XOR |
| |
| Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_XOR |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] ^= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_inc") |
| { |
| setFlag(AtomicInc); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_INC |
| |
| Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_INC |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_atomic_dec") |
| { |
| setFlag(AtomicDec); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_ATOMIC_DEC |
| |
| Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC() |
| { |
| } // ~Inst_MIMG__IMAGE_ATOMIC_DEC |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 |
| // (unsigned compare); RETURN_DATA = tmp. |
| void |
| Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE |
| |
| Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_CL |
| |
| Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_CL |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_d") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_D |
| |
| Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_D |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_d_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_D_CL |
| |
| Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_l") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_L |
| |
| Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_L |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_b") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_B |
| |
| Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_B |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_b_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_B_CL |
| |
| Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_lz") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_LZ |
| |
| Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_LZ |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C |
| |
| Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_CL |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_d") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_D |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_D |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_d_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_l") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_L |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_L |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_b") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_B |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_B |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_b_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_lz") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_LZ |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_CL_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_d_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_D_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_D_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_d_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_l_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_L_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_L_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_b_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_B_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_B_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_b_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_lz_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_LZ_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_d_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_D_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_d_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_l_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_L_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_b_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_B_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_b_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_lz_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4 |
| |
| Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4 |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_CL |
| |
| Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_CL |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_l") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_L |
| |
| Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_L |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_b") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_B |
| |
| Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_B |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_b_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_B_CL |
| |
| Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_B_CL |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_lz") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_LZ |
| |
| Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_LZ |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C |
| |
| Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_CL |
| |
| Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_CL |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_l") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_L |
| |
| Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_L |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_b") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_B |
| |
| Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_B |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_b_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_B_CL |
| |
| Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_lz") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_LZ |
| |
| Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_O |
| |
| Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_CL_O |
| |
| Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_l_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_L_O |
| |
| Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_L_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_b_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_B_O |
| |
| Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_B_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_b_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_B_CL_O |
| |
| Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_lz_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_LZ_O |
| |
| Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_O |
| |
| Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_CL_O |
| |
| Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_l_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_L_O |
| |
| Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_b_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_B_O |
| |
| Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_b_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O |
| |
| Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_gather4_c_lz_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O |
| |
| Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O() |
| { |
| } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O |
| |
| void |
| Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_get_lod") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_GET_LOD |
| |
| Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD() |
| { |
| } // ~Inst_MIMG__IMAGE_GET_LOD |
| |
| void |
| Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_cd") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_CD |
| |
| Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_CD |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_cd_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_CD_CL |
| |
| Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_cd") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_CD |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_cd_cl") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_cd_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_CD_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_cd_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_cd_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O( |
| InFmt_MIMG *iFmt) |
| : Inst_MIMG(iFmt, "image_sample_c_cd_cl_o") |
| { |
| setFlag(GlobalSegment); |
| } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O |
| |
| Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O() |
| { |
| } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O |
| |
| void |
| Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt) |
| : Inst_EXP(iFmt, "exp") |
| { |
| } // Inst_EXP__EXP |
| |
| Inst_EXP__EXP::~Inst_EXP__EXP() |
| { |
| } // ~Inst_EXP__EXP |
| |
| void |
| Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_load_ubyte") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_FLAT__FLAT_LOAD_UBYTE |
| |
| Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE() |
| { |
| } // ~Inst_FLAT__FLAT_LOAD_UBYTE |
| |
| // Untyped buffer load unsigned byte (zero extend to VGPR destination). |
| void |
| Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->rdGmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<VecElemU8>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>( |
| gpuDynInst->d_data))[lane]); |
| } |
| } |
| vdst.write(); |
| } // execute |
| // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods --- |
| |
| Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_load_sbyte") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_FLAT__FLAT_LOAD_SBYTE |
| |
| Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE() |
| { |
| } // ~Inst_FLAT__FLAT_LOAD_SBYTE |
| |
| // Untyped buffer load signed byte (sign extend to VGPR destination). |
| void |
| Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->rdGmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<VecElemI8>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandI32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (VecElemI32)((reinterpret_cast<VecElemI8*>( |
| gpuDynInst->d_data))[lane]); |
| } |
| } |
| vdst.write(); |
| } |
| |
| Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_load_ushort") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_FLAT__FLAT_LOAD_USHORT |
| |
| Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT() |
| { |
| } // ~Inst_FLAT__FLAT_LOAD_USHORT |
| |
| // Untyped buffer load unsigned short (zero extend to VGPR destination). |
| void |
| Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->rdGmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<VecElemU16>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>( |
| gpuDynInst->d_data))[lane]); |
| } |
| } |
| vdst.write(); |
| } |
| |
| |
| Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_load_sshort") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_FLAT__FLAT_LOAD_SSHORT |
| |
| Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT() |
| { |
| } // ~Inst_FLAT__FLAT_LOAD_SSHORT |
| |
| // Untyped buffer load signed short (sign extend to VGPR destination). |
| void |
| Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_load_dword") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_FLAT__FLAT_LOAD_DWORD |
| |
| Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD() |
| { |
| } // ~Inst_FLAT__FLAT_LOAD_DWORD |
| |
| // Untyped buffer load dword. |
| void |
| Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->rdGmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| vdst.write(); |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_load_dwordx2") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_FLAT__FLAT_LOAD_DWORDX2 |
| |
| Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2() |
| { |
| } // ~Inst_FLAT__FLAT_LOAD_DWORDX2 |
| |
| // Untyped buffer load 2 dwords. |
| void |
| Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->rdGmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<VecElemU64>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU64 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| vdst.write(); |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_load_dwordx3") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_FLAT__FLAT_LOAD_DWORDX3 |
| |
| Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3() |
| { |
| } // ~Inst_FLAT__FLAT_LOAD_DWORDX3 |
| |
| // Untyped buffer load 3 dwords. |
| void |
| Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->rdGmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<3>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst0(gpuDynInst, extData.VDST); |
| VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); |
| VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst0[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 3]; |
| vdst1[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 3 + 1]; |
| vdst2[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 3 + 2]; |
| } |
| } |
| |
| vdst0.write(); |
| vdst1.write(); |
| vdst2.write(); |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_load_dwordx4") |
| { |
| setFlag(MemoryRef); |
| setFlag(Load); |
| } // Inst_FLAT__FLAT_LOAD_DWORDX4 |
| |
| Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4() |
| { |
| } // ~Inst_FLAT__FLAT_LOAD_DWORDX4 |
| |
| // Untyped buffer load 4 dwords. |
| void |
| Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->rdGmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initMemRead<4>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| VecOperandU32 vdst0(gpuDynInst, extData.VDST); |
| VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); |
| VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); |
| VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst0[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4]; |
| vdst1[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4 + 1]; |
| vdst2[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4 + 2]; |
| vdst3[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4 + 3]; |
| } |
| } |
| |
| vdst0.write(); |
| vdst1.write(); |
| vdst2.write(); |
| vdst3.write(); |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_store_byte") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_FLAT__FLAT_STORE_BYTE |
| |
| Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE() |
| { |
| } // ~Inst_FLAT__FLAT_STORE_BYTE |
| |
| // Untyped buffer store byte. |
| void |
| Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } // execute |
| |
| void |
| Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandU8 data(gpuDynInst, extData.DATA); |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| initMemWrite<VecElemU8>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } |
| |
| Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_store_short") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_FLAT__FLAT_STORE_SHORT |
| |
| Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT() |
| { |
| } // ~Inst_FLAT__FLAT_STORE_SHORT |
| |
| // Untyped buffer store short. |
| void |
| Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandU16 data(gpuDynInst, extData.DATA); |
| |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| initMemWrite<VecElemU16>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_store_dword") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_FLAT__FLAT_STORE_DWORD |
| |
| Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD() |
| { |
| } // ~Inst_FLAT__FLAT_STORE_DWORD |
| |
| // Untyped buffer store dword. |
| void |
| Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA); |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| initMemWrite<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_store_dwordx2") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_FLAT__FLAT_STORE_DWORDX2 |
| |
| Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2() |
| { |
| } // ~Inst_FLAT__FLAT_STORE_DWORDX2 |
| |
| // Untyped buffer store 2 dwords. |
| void |
| Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| wf->outstandingReqs++; |
| wf->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandU64 data(gpuDynInst, extData.DATA); |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| initMemWrite<VecElemU64>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_store_dwordx3") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_FLAT__FLAT_STORE_DWORDX3 |
| |
| Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3() |
| { |
| } // ~Inst_FLAT__FLAT_STORE_DWORDX3 |
| |
| // Untyped buffer store 3 dwords. |
| void |
| Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandU32 data0(gpuDynInst, extData.DATA); |
| ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); |
| ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); |
| |
| data0.read(); |
| data1.read(); |
| data2.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 3] = data0[lane]; |
| (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 3 + 1] = data1[lane]; |
| (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 3 + 2] = data2[lane]; |
| } |
| } |
| |
| initMemWrite<3>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_store_dwordx4") |
| { |
| setFlag(MemoryRef); |
| setFlag(Store); |
| } // Inst_FLAT__FLAT_STORE_DWORDX4 |
| |
| Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4() |
| { |
| } // ~Inst_FLAT__FLAT_STORE_DWORDX4 |
| |
| // Untyped buffer store 4 dwords. |
| void |
| Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe |
| .issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| ConstVecOperandU32 data0(gpuDynInst, extData.DATA); |
| ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); |
| ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); |
| ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3); |
| |
| data0.read(); |
| data1.read(); |
| data2.read(); |
| data3.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4] = data0[lane]; |
| (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; |
| (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; |
| (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; |
| } |
| } |
| |
| initMemWrite<4>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_swap") |
| { |
| setFlag(AtomicExch); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } // if |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_SWAP |
| |
| Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_SWAP |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| |
| addr.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL || |
| gpuDynInst->executedAs() == enums::SC_PRIVATE) { |
| // TODO: additional address computation required for scratch |
| panic_if(gpuDynInst->executedAs() == enums::SC_PRIVATE, |
| "Flats to private aperture not tested yet\n"); |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA); |
| |
| data.read(); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| } // execute |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods --- |
| |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP |
| ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_cmpswap") |
| { |
| setFlag(AtomicCAS); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } // if |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP |
| |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA[0]; |
| // cmp = DATA[1]; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA); |
| ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1); |
| |
| addr.read(); |
| data.read(); |
| cmp.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane] |
| = data[lane]; |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane] |
| = cmp[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL || |
| gpuDynInst->executedAs() == enums::SC_PRIVATE) { |
| /** |
| * TODO: If you encounter this panic, just remove this panic |
| * and restart the simulation. It should just work fine but |
| * this is to warn user that this path is never tested although |
| * all the necessary logic is implemented |
| */ |
| panic_if(gpuDynInst->executedAs() == enums::SC_PRIVATE, |
| "Flats to private aperture not tested yet\n"); |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_add") |
| { |
| setFlag(AtomicAdd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } // if |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_ADD |
| |
| Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_ADD |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_sub") |
| { |
| setFlag(AtomicSub); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } // if |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_SUB |
| |
| Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_SUB |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| void |
| Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_smin") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_SMIN |
| |
| Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_SMIN |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_umin") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_UMIN |
| |
| Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_UMIN |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_smax") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_SMAX |
| |
| Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_SMAX |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_umax") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_UMAX |
| |
| Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_UMAX |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_and") |
| { |
| setFlag(AtomicAnd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_AND |
| |
| Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_AND |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] &= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_or") |
| { |
| setFlag(AtomicOr); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_OR |
| |
| Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_OR |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] |= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_xor") |
| { |
| setFlag(AtomicXor); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_XOR |
| |
| Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_XOR |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] ^= DATA; |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_inc") |
| { |
| setFlag(AtomicInc); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_INC |
| |
| Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_INC |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); |
| // RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_dec") |
| { |
| setFlag(AtomicDec); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_DEC |
| |
| Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_DEC |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 |
| // (unsigned compare); RETURN_DATA = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU32 data(gpuDynInst, extData.DATA); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU32>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU32 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU32*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_swap_x2") |
| { |
| setFlag(AtomicExch); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2") |
| { |
| setFlag(AtomicCAS); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 |
| |
| // tmp = MEM[ADDR]; |
| // src = DATA[0:1]; |
| // cmp = DATA[2:3]; |
| // MEM[ADDR] = (tmp == cmp) ? src : tmp; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU64 data(gpuDynInst, extData.DATA); |
| ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2); |
| |
| addr.read(); |
| data.read(); |
| cmp.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU64*>(gpuDynInst->x_data))[lane] |
| = data[lane]; |
| (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane] |
| = cmp[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL || |
| gpuDynInst->executedAs() == enums::SC_PRIVATE) { |
| /** |
| * TODO: If you encounter this panic, just remove this panic |
| * and restart the simulation. It should just work fine but |
| * this is to warn user that this path is never tested although |
| * all the necessary logic is implemented |
| */ |
| panic_if(gpuDynInst->executedAs() == enums::SC_PRIVATE, |
| "Flats to private aperture not tested yet\n"); |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU64>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU64 vdst(gpuDynInst, extData.VDST); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_add_x2") |
| { |
| setFlag(AtomicAdd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_ADD_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] += DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU64 data(gpuDynInst, extData.DATA); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU64>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU64 vdst(gpuDynInst, extData.VDST); |
| |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_sub_x2") |
| { |
| setFlag(AtomicSub); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_SUB_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU64 data(gpuDynInst, extData.DATA); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU64>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU64 vdst(gpuDynInst, extData.VDST); |
| |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_smin_x2") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_umin_x2") |
| { |
| setFlag(AtomicMin); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_smax_x2") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_umax_x2") |
| { |
| setFlag(AtomicMax); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_and_x2") |
| { |
| setFlag(AtomicAnd); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_AND_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] &= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_or_x2") |
| { |
| setFlag(AtomicOr); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_OR_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] |= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_xor_x2") |
| { |
| setFlag(AtomicXor); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_XOR_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] ^= DATA[0:1]; |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| panicUnimplemented(); |
| } |
| |
| Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_inc_x2") |
| { |
| setFlag(AtomicInc); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_INC_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU64 data(gpuDynInst, extData.DATA); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU64>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU64 vdst(gpuDynInst, extData.VDST); |
| |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| |
| Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2( |
| InFmt_FLAT *iFmt) |
| : Inst_FLAT(iFmt, "flat_atomic_dec_x2") |
| { |
| setFlag(AtomicDec); |
| if (instData.GLC) { |
| setFlag(AtomicReturn); |
| } else { |
| setFlag(AtomicNoReturn); |
| } |
| setFlag(MemoryRef); |
| } // Inst_FLAT__FLAT_ATOMIC_DEC_X2 |
| |
| Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2() |
| { |
| } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2 |
| |
| // tmp = MEM[ADDR]; |
| // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 |
| // (unsigned compare); |
| // RETURN_DATA[0:1] = tmp. |
| void |
| Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wf = gpuDynInst->wavefront(); |
| |
| if (wf->execMask().none()) { |
| wf->decVMemInstsIssued(); |
| wf->decLGKMInstsIssued(); |
| wf->wrGmReqsInPipe--; |
| wf->rdGmReqsInPipe--; |
| wf->wrLmReqsInPipe--; |
| wf->rdLmReqsInPipe--; |
| if (instData.GLC) { |
| gpuDynInst->exec_mask = wf->execMask(); |
| wf->computeUnit->vrf[wf->simdId]-> |
| scheduleWriteOperandsFromLoad(wf, gpuDynInst); |
| } |
| return; |
| } |
| |
| gpuDynInst->execUnitId = wf->execUnitId; |
| gpuDynInst->exec_mask = wf->execMask(); |
| gpuDynInst->latency.init(gpuDynInst->computeUnit()); |
| gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); |
| |
| ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); |
| ConstVecOperandU64 data(gpuDynInst, extData.DATA); |
| |
| addr.read(); |
| data.read(); |
| |
| calcAddr(gpuDynInst, addr); |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane] |
| = data[lane]; |
| } |
| } |
| |
| if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { |
| gpuDynInst->computeUnit()->globalMemoryPipe. |
| issueRequest(gpuDynInst); |
| wf->wrGmReqsInPipe--; |
| wf->outstandingReqsWrGm++; |
| wf->rdGmReqsInPipe--; |
| wf->outstandingReqsRdGm++; |
| } else { |
| fatal("Non global flat instructions not implemented yet.\n"); |
| } |
| |
| gpuDynInst->wavefront()->outstandingReqs++; |
| gpuDynInst->wavefront()->validateRequestCounters(); |
| } |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| initAtomicAccess<VecElemU64>(gpuDynInst); |
| } // initiateAcc |
| |
| void |
| Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst) |
| { |
| if (isAtomicRet()) { |
| VecOperandU64 vdst(gpuDynInst, extData.VDST); |
| |
| |
| for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { |
| if (gpuDynInst->exec_mask[lane]) { |
| vdst[lane] = (reinterpret_cast<VecElemU64*>( |
| gpuDynInst->d_data))[lane]; |
| } |
| } |
| |
| vdst.write(); |
| } |
| } // completeAcc |
| } // namespace Gcn3ISA |
| } // namespace gem5 |