| /* |
| * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. |
| * All rights reserved. |
| * |
| * For use for simulation and test purposes only |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * 3. Neither the name of the copyright holder nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Author: Steve Reinhardt |
| */ |
| |
| #ifndef __ARCH_HSAIL_INSTS_DECL_HH__ |
| #define __ARCH_HSAIL_INSTS_DECL_HH__ |
| |
| #include <cmath> |
| |
| #include "arch/hsail/insts/gpu_static_inst.hh" |
| #include "arch/hsail/operand.hh" |
| #include "debug/HSAIL.hh" |
| #include "gpu-compute/gpu_dyn_inst.hh" |
| #include "gpu-compute/shader.hh" |
| |
| namespace HsailISA |
| { |
| template<typename _DestOperand, typename _SrcOperand> |
| class HsailOperandType |
| { |
| public: |
| typedef _DestOperand DestOperand; |
| typedef _SrcOperand SrcOperand; |
| }; |
| |
| typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType; |
| typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType; |
| typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType; |
| |
| // The IsBits parameter serves only to disambiguate tbhe B* types from |
| // the U* types, which otherwise would be identical (and |
| // indistinguishable). |
| template<typename _OperandType, typename _CType, Enums::MemType _memType, |
| vgpr_type _vgprType, int IsBits=0> |
| class HsailDataType |
| { |
| public: |
| typedef _OperandType OperandType; |
| typedef _CType CType; |
| static const Enums::MemType memType = _memType; |
| static const vgpr_type vgprType = _vgprType; |
| static const char *label; |
| }; |
| |
| typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1; |
| typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8; |
| |
| typedef HsailDataType<SRegOperandType, uint16_t, |
| Enums::M_U16, VT_32, 1> B16; |
| |
| typedef HsailDataType<SRegOperandType, uint32_t, |
| Enums::M_U32, VT_32, 1> B32; |
| |
| typedef HsailDataType<DRegOperandType, uint64_t, |
| Enums::M_U64, VT_64, 1> B64; |
| |
| typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8; |
| typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16; |
| typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32; |
| typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64; |
| |
| typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8; |
| typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16; |
| typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32; |
| typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64; |
| |
| typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32; |
| typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64; |
| |
| template<typename DestOperandType, typename SrcOperandType, |
| int NumSrcOperands> |
| class CommonInstBase : public HsailGPUStaticInst |
| { |
| protected: |
| typename DestOperandType::DestOperand dest; |
| typename SrcOperandType::SrcOperand src[NumSrcOperands]; |
| |
| void |
| generateDisassembly() |
| { |
| disassembly = csprintf("%s%s %s", opcode, opcode_suffix(), |
| dest.disassemble()); |
| |
| for (int i = 0; i < NumSrcOperands; ++i) { |
| disassembly += ","; |
| disassembly += src[i].disassemble(); |
| } |
| } |
| |
| virtual std::string opcode_suffix() = 0; |
| |
| public: |
| CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *opcode) |
| : HsailGPUStaticInst(obj, opcode) |
| { |
| setFlag(ALU); |
| |
| unsigned op_offs = obj->getOperandPtr(ib->operands, 0); |
| |
| dest.init(op_offs, obj); |
| |
| for (int i = 0; i < NumSrcOperands; ++i) { |
| op_offs = obj->getOperandPtr(ib->operands, i + 1); |
| src[i].init(op_offs, obj); |
| } |
| } |
| |
| bool isVectorRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex < NumSrcOperands) |
| return src[operandIndex].isVectorRegister(); |
| else |
| return dest.isVectorRegister(); |
| } |
| bool isCondRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex < NumSrcOperands) |
| return src[operandIndex].isCondRegister(); |
| else |
| return dest.isCondRegister(); |
| } |
| bool isScalarRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex < NumSrcOperands) |
| return src[operandIndex].isScalarRegister(); |
| else |
| return dest.isScalarRegister(); |
| } |
| bool isSrcOperand(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex < NumSrcOperands) |
| return true; |
| return false; |
| } |
| |
| bool isDstOperand(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex >= NumSrcOperands) |
| return true; |
| return false; |
| } |
| int getOperandSize(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex < NumSrcOperands) |
| return src[operandIndex].opSize(); |
| else |
| return dest.opSize(); |
| } |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) |
| { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| |
| if (operandIndex < NumSrcOperands) |
| return src[operandIndex].regIndex(); |
| else |
| return dest.regIndex(); |
| } |
| int numSrcRegOperands() { |
| int operands = 0; |
| for (int i = 0; i < NumSrcOperands; i++) { |
| if (src[i].isVectorRegister()) { |
| operands++; |
| } |
| } |
| return operands; |
| } |
| int numDstRegOperands() { return dest.isVectorRegister(); } |
| int getNumOperands() { return NumSrcOperands + 1; } |
| }; |
| |
| template<typename DataType, int NumSrcOperands> |
| class ArithInst : public CommonInstBase<typename DataType::OperandType, |
| typename DataType::OperandType, |
| NumSrcOperands> |
| { |
| public: |
| std::string opcode_suffix() { return csprintf("_%s", DataType::label); } |
| |
| ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *opcode) |
| : CommonInstBase<typename DataType::OperandType, |
| typename DataType::OperandType, |
| NumSrcOperands>(ib, obj, opcode) |
| { |
| } |
| }; |
| |
| template<typename DestOperandType, typename Src0OperandType, |
| typename Src1OperandType, typename Src2OperandType> |
| class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst |
| { |
| protected: |
| typename DestOperandType::DestOperand dest; |
| typename Src0OperandType::SrcOperand src0; |
| typename Src1OperandType::SrcOperand src1; |
| typename Src2OperandType::SrcOperand src2; |
| |
| void |
| generateDisassembly() |
| { |
| disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(), |
| src0.disassemble(), src1.disassemble(), |
| src2.disassemble()); |
| } |
| |
| public: |
| ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib, |
| const BrigObject *obj, |
| const char *opcode) |
| : HsailGPUStaticInst(obj, opcode) |
| { |
| setFlag(ALU); |
| |
| unsigned op_offs = obj->getOperandPtr(ib->operands, 0); |
| dest.init(op_offs, obj); |
| |
| op_offs = obj->getOperandPtr(ib->operands, 1); |
| src0.init(op_offs, obj); |
| |
| op_offs = obj->getOperandPtr(ib->operands, 2); |
| src1.init(op_offs, obj); |
| |
| op_offs = obj->getOperandPtr(ib->operands, 3); |
| src2.init(op_offs, obj); |
| } |
| |
| bool isVectorRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.isVectorRegister(); |
| else if (operandIndex == 1) |
| return src1.isVectorRegister(); |
| else if (operandIndex == 2) |
| return src2.isVectorRegister(); |
| else |
| return dest.isVectorRegister(); |
| } |
| bool isCondRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.isCondRegister(); |
| else if (operandIndex == 1) |
| return src1.isCondRegister(); |
| else if (operandIndex == 2) |
| return src2.isCondRegister(); |
| else |
| return dest.isCondRegister(); |
| } |
| bool isScalarRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.isScalarRegister(); |
| else if (operandIndex == 1) |
| return src1.isScalarRegister(); |
| else if (operandIndex == 2) |
| return src2.isScalarRegister(); |
| else |
| return dest.isScalarRegister(); |
| } |
| bool isSrcOperand(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex < 3) |
| return true; |
| else |
| return false; |
| } |
| bool isDstOperand(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex >= 3) |
| return true; |
| else |
| return false; |
| } |
| int getOperandSize(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.opSize(); |
| else if (operandIndex == 1) |
| return src1.opSize(); |
| else if (operandIndex == 2) |
| return src2.opSize(); |
| else |
| return dest.opSize(); |
| } |
| |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) |
| { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.regIndex(); |
| else if (operandIndex == 1) |
| return src1.regIndex(); |
| else if (operandIndex == 2) |
| return src2.regIndex(); |
| else |
| return dest.regIndex(); |
| } |
| |
| int numSrcRegOperands() { |
| int operands = 0; |
| if (src0.isVectorRegister()) { |
| operands++; |
| } |
| if (src1.isVectorRegister()) { |
| operands++; |
| } |
| if (src2.isVectorRegister()) { |
| operands++; |
| } |
| return operands; |
| } |
| int numDstRegOperands() { return dest.isVectorRegister(); } |
| int getNumOperands() { return 4; } |
| }; |
| |
| template<typename DestDataType, typename Src0DataType, |
| typename Src1DataType, typename Src2DataType> |
| class ThreeNonUniformSourceInst : |
| public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType, |
| typename Src0DataType::OperandType, |
| typename Src1DataType::OperandType, |
| typename Src2DataType::OperandType> |
| { |
| public: |
| typedef typename DestDataType::CType DestCType; |
| typedef typename Src0DataType::CType Src0CType; |
| typedef typename Src1DataType::CType Src1CType; |
| typedef typename Src2DataType::CType Src2CType; |
| |
| ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib, |
| const BrigObject *obj, const char *opcode) |
| : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType, |
| typename Src0DataType::OperandType, |
| typename Src1DataType::OperandType, |
| typename Src2DataType::OperandType>(ib, |
| obj, opcode) |
| { |
| } |
| }; |
| |
| template<typename DataType> |
| class CmovInst : public ThreeNonUniformSourceInst<DataType, B1, |
| DataType, DataType> |
| { |
| public: |
| CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *opcode) |
| : ThreeNonUniformSourceInst<DataType, B1, DataType, |
| DataType>(ib, obj, opcode) |
| { |
| } |
| }; |
| |
| template<typename DataType> |
| class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType, |
| DataType, U32, |
| U32> |
| { |
| public: |
| ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *opcode) |
| : ThreeNonUniformSourceInst<DataType, DataType, U32, |
| U32>(ib, obj, opcode) |
| { |
| } |
| }; |
| |
| template<typename DestOperandType, typename Src0OperandType, |
| typename Src1OperandType> |
| class TwoNonUniformSourceInstBase : public HsailGPUStaticInst |
| { |
| protected: |
| typename DestOperandType::DestOperand dest; |
| typename Src0OperandType::SrcOperand src0; |
| typename Src1OperandType::SrcOperand src1; |
| |
| void |
| generateDisassembly() |
| { |
| disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(), |
| src0.disassemble(), src1.disassemble()); |
| } |
| |
| |
| public: |
| TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib, |
| const BrigObject *obj, const char *opcode) |
| : HsailGPUStaticInst(obj, opcode) |
| { |
| setFlag(ALU); |
| |
| unsigned op_offs = obj->getOperandPtr(ib->operands, 0); |
| dest.init(op_offs, obj); |
| |
| op_offs = obj->getOperandPtr(ib->operands, 1); |
| src0.init(op_offs, obj); |
| |
| op_offs = obj->getOperandPtr(ib->operands, 2); |
| src1.init(op_offs, obj); |
| } |
| bool isVectorRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.isVectorRegister(); |
| else if (operandIndex == 1) |
| return src1.isVectorRegister(); |
| else |
| return dest.isVectorRegister(); |
| } |
| bool isCondRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.isCondRegister(); |
| else if (operandIndex == 1) |
| return src1.isCondRegister(); |
| else |
| return dest.isCondRegister(); |
| } |
| bool isScalarRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.isScalarRegister(); |
| else if (operandIndex == 1) |
| return src1.isScalarRegister(); |
| else |
| return dest.isScalarRegister(); |
| } |
| bool isSrcOperand(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex < 2) |
| return true; |
| else |
| return false; |
| } |
| bool isDstOperand(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex >= 2) |
| return true; |
| else |
| return false; |
| } |
| int getOperandSize(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.opSize(); |
| else if (operandIndex == 1) |
| return src1.opSize(); |
| else |
| return dest.opSize(); |
| } |
| |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) |
| { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return src0.regIndex(); |
| else if (operandIndex == 1) |
| return src1.regIndex(); |
| else |
| return dest.regIndex(); |
| } |
| |
| int numSrcRegOperands() { |
| int operands = 0; |
| if (src0.isVectorRegister()) { |
| operands++; |
| } |
| if (src1.isVectorRegister()) { |
| operands++; |
| } |
| return operands; |
| } |
| int numDstRegOperands() { return dest.isVectorRegister(); } |
| int getNumOperands() { return 3; } |
| }; |
| |
| template<typename DestDataType, typename Src0DataType, |
| typename Src1DataType> |
| class TwoNonUniformSourceInst : |
| public TwoNonUniformSourceInstBase<typename DestDataType::OperandType, |
| typename Src0DataType::OperandType, |
| typename Src1DataType::OperandType> |
| { |
| public: |
| typedef typename DestDataType::CType DestCType; |
| typedef typename Src0DataType::CType Src0CType; |
| typedef typename Src1DataType::CType Src1CType; |
| |
| TwoNonUniformSourceInst(const Brig::BrigInstBase *ib, |
| const BrigObject *obj, const char *opcode) |
| : TwoNonUniformSourceInstBase<typename DestDataType::OperandType, |
| typename Src0DataType::OperandType, |
| typename Src1DataType::OperandType>(ib, |
| obj, opcode) |
| { |
| } |
| }; |
| |
| // helper function for ClassInst |
| template<typename T> |
| bool |
| fpclassify(T src0, uint32_t src1) |
| { |
| int fpclass = std::fpclassify(src0); |
| |
| if ((src1 & 0x3) && (fpclass == FP_NAN)) { |
| return true; |
| } |
| |
| if (src0 <= -0.0) { |
| if ((src1 & 0x4) && fpclass == FP_INFINITE) |
| return true; |
| if ((src1 & 0x8) && fpclass == FP_NORMAL) |
| return true; |
| if ((src1 & 0x10) && fpclass == FP_SUBNORMAL) |
| return true; |
| if ((src1 & 0x20) && fpclass == FP_ZERO) |
| return true; |
| } else { |
| if ((src1 & 0x40) && fpclass == FP_ZERO) |
| return true; |
| if ((src1 & 0x80) && fpclass == FP_SUBNORMAL) |
| return true; |
| if ((src1 & 0x100) && fpclass == FP_NORMAL) |
| return true; |
| if ((src1 & 0x200) && fpclass == FP_INFINITE) |
| return true; |
| } |
| return false; |
| } |
| |
| template<typename DataType> |
| class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32> |
| { |
| public: |
| ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *opcode) |
| : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode) |
| { |
| } |
| }; |
| |
| template<typename DataType> |
| class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32> |
| { |
| public: |
| ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *opcode) |
| : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode) |
| { |
| } |
| }; |
| |
| // helper function for CmpInst |
| template<typename T> |
| bool |
| compare(T src0, T src1, Brig::BrigCompareOperation cmpOp) |
| { |
| using namespace Brig; |
| |
| switch (cmpOp) { |
| case BRIG_COMPARE_EQ: |
| case BRIG_COMPARE_EQU: |
| case BRIG_COMPARE_SEQ: |
| case BRIG_COMPARE_SEQU: |
| return (src0 == src1); |
| |
| case BRIG_COMPARE_NE: |
| case BRIG_COMPARE_NEU: |
| case BRIG_COMPARE_SNE: |
| case BRIG_COMPARE_SNEU: |
| return (src0 != src1); |
| |
| case BRIG_COMPARE_LT: |
| case BRIG_COMPARE_LTU: |
| case BRIG_COMPARE_SLT: |
| case BRIG_COMPARE_SLTU: |
| return (src0 < src1); |
| |
| case BRIG_COMPARE_LE: |
| case BRIG_COMPARE_LEU: |
| case BRIG_COMPARE_SLE: |
| case BRIG_COMPARE_SLEU: |
| return (src0 <= src1); |
| |
| case BRIG_COMPARE_GT: |
| case BRIG_COMPARE_GTU: |
| case BRIG_COMPARE_SGT: |
| case BRIG_COMPARE_SGTU: |
| return (src0 > src1); |
| |
| case BRIG_COMPARE_GE: |
| case BRIG_COMPARE_GEU: |
| case BRIG_COMPARE_SGE: |
| case BRIG_COMPARE_SGEU: |
| return (src0 >= src1); |
| |
| case BRIG_COMPARE_NUM: |
| case BRIG_COMPARE_SNUM: |
| return (src0 == src0) || (src1 == src1); |
| |
| case BRIG_COMPARE_NAN: |
| case BRIG_COMPARE_SNAN: |
| return (src0 != src0) || (src1 != src1); |
| |
| default: |
| fatal("Bad cmpOp value %d\n", (int)cmpOp); |
| } |
| } |
| |
| template<typename T> |
| int32_t |
| firstbit(T src0) |
| { |
| if (!src0) |
| return -1; |
| |
| //handle positive and negative numbers |
| T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0); |
| |
| //the starting pos is MSB |
| int pos = 8 * sizeof(T) - 1; |
| int cnt = 0; |
| |
| //search the first bit set to 1 |
| while (!(tmp & (1 << pos))) { |
| ++cnt; |
| --pos; |
| } |
| return cnt; |
| } |
| |
| const char* cmpOpToString(Brig::BrigCompareOperation cmpOp); |
| |
| template<typename DestOperandType, typename SrcOperandType> |
| class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType, |
| 2> |
| { |
| protected: |
| Brig::BrigCompareOperation cmpOp; |
| |
| public: |
| CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *_opcode) |
| : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj, |
| _opcode) |
| { |
| assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP); |
| Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib; |
| cmpOp = (Brig::BrigCompareOperation)i->compare; |
| } |
| }; |
| |
| template<typename DestDataType, typename SrcDataType> |
| class CmpInst : public CmpInstBase<typename DestDataType::OperandType, |
| typename SrcDataType::OperandType> |
| { |
| public: |
| std::string |
| opcode_suffix() |
| { |
| return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp), |
| DestDataType::label, SrcDataType::label); |
| } |
| |
| CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *_opcode) |
| : CmpInstBase<typename DestDataType::OperandType, |
| typename SrcDataType::OperandType>(ib, obj, _opcode) |
| { |
| } |
| }; |
| |
| template<typename DestDataType, typename SrcDataType> |
| class CvtInst : public CommonInstBase<typename DestDataType::OperandType, |
| typename SrcDataType::OperandType, 1> |
| { |
| public: |
| std::string opcode_suffix() |
| { |
| return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); |
| } |
| |
| CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *_opcode) |
| : CommonInstBase<typename DestDataType::OperandType, |
| typename SrcDataType::OperandType, |
| 1>(ib, obj, _opcode) |
| { |
| } |
| }; |
| |
| template<typename DestDataType, typename SrcDataType> |
| class PopcountInst : |
| public CommonInstBase<typename DestDataType::OperandType, |
| typename SrcDataType::OperandType, 1> |
| { |
| public: |
| std::string opcode_suffix() |
| { |
| return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); |
| } |
| |
| PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *_opcode) |
| : CommonInstBase<typename DestDataType::OperandType, |
| typename SrcDataType::OperandType, |
| 1>(ib, obj, _opcode) |
| { |
| } |
| }; |
| |
| class Stub : public HsailGPUStaticInst |
| { |
| public: |
| Stub(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *_opcode) |
| : HsailGPUStaticInst(obj, _opcode) |
| { |
| } |
| |
| void generateDisassembly() override |
| { |
| disassembly = csprintf("%s", opcode); |
| } |
| |
| bool isVectorRegister(int operandIndex) override { return false; } |
| bool isCondRegister(int operandIndex) override { return false; } |
| bool isScalarRegister(int operandIndex) override { return false; } |
| bool isSrcOperand(int operandIndex) override { return false; } |
| bool isDstOperand(int operandIndex) override { return false; } |
| int getOperandSize(int operandIndex) override { return 0; } |
| |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override |
| { |
| return -1; |
| } |
| |
| int numSrcRegOperands() override { return 0; } |
| int numDstRegOperands() override { return 0; } |
| int getNumOperands() override { return 0; } |
| }; |
| |
| class SpecialInstNoSrcNoDest : public HsailGPUStaticInst |
| { |
| public: |
| SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib, |
| const BrigObject *obj, const char *_opcode) |
| : HsailGPUStaticInst(obj, _opcode) |
| { |
| } |
| |
| bool isVectorRegister(int operandIndex) override { return false; } |
| bool isCondRegister(int operandIndex) override { return false; } |
| bool isScalarRegister(int operandIndex) override { return false; } |
| bool isSrcOperand(int operandIndex) override { return false; } |
| bool isDstOperand(int operandIndex) override { return false; } |
| int getOperandSize(int operandIndex) override { return 0; } |
| |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override |
| { |
| return -1; |
| } |
| |
| int numSrcRegOperands() override { return 0; } |
| int numDstRegOperands() override { return 0; } |
| int getNumOperands() override { return 0; } |
| }; |
| |
| template<typename DestOperandType> |
| class SpecialInstNoSrcBase : public HsailGPUStaticInst |
| { |
| protected: |
| typename DestOperandType::DestOperand dest; |
| |
| void generateDisassembly() |
| { |
| disassembly = csprintf("%s %s", opcode, dest.disassemble()); |
| } |
| |
| public: |
| SpecialInstNoSrcBase(const Brig::BrigInstBase *ib, |
| const BrigObject *obj, const char *_opcode) |
| : HsailGPUStaticInst(obj, _opcode) |
| { |
| unsigned op_offs = obj->getOperandPtr(ib->operands, 0); |
| dest.init(op_offs, obj); |
| } |
| |
| bool isVectorRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.isVectorRegister(); |
| } |
| bool isCondRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.isCondRegister(); |
| } |
| bool isScalarRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.isScalarRegister(); |
| } |
| bool isSrcOperand(int operandIndex) { return false; } |
| bool isDstOperand(int operandIndex) { return true; } |
| int getOperandSize(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.opSize(); |
| } |
| |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) |
| { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.regIndex(); |
| } |
| |
| int numSrcRegOperands() { return 0; } |
| int numDstRegOperands() { return dest.isVectorRegister(); } |
| int getNumOperands() { return 1; } |
| }; |
| |
| template<typename DestDataType> |
| class SpecialInstNoSrc : |
| public SpecialInstNoSrcBase<typename DestDataType::OperandType> |
| { |
| public: |
| typedef typename DestDataType::CType DestCType; |
| |
| SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *_opcode) |
| : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj, |
| _opcode) |
| { |
| } |
| }; |
| |
| template<typename DestOperandType> |
| class SpecialInst1SrcBase : public HsailGPUStaticInst |
| { |
| protected: |
| typedef int SrcCType; // used in execute() template |
| |
| typename DestOperandType::DestOperand dest; |
| ImmOperand<SrcCType> src0; |
| |
| void |
| generateDisassembly() |
| { |
| disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(), |
| src0.disassemble()); |
| } |
| |
| public: |
| SpecialInst1SrcBase(const Brig::BrigInstBase *ib, |
| const BrigObject *obj, const char *_opcode) |
| : HsailGPUStaticInst(obj, _opcode) |
| { |
| setFlag(ALU); |
| |
| unsigned op_offs = obj->getOperandPtr(ib->operands, 0); |
| dest.init(op_offs, obj); |
| |
| op_offs = obj->getOperandPtr(ib->operands, 1); |
| src0.init(op_offs, obj); |
| } |
| bool isVectorRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.isVectorRegister(); |
| } |
| bool isCondRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.isCondRegister(); |
| } |
| bool isScalarRegister(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.isScalarRegister(); |
| } |
| bool isSrcOperand(int operandIndex) { return false; } |
| bool isDstOperand(int operandIndex) { return true; } |
| int getOperandSize(int operandIndex) { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.opSize(); |
| } |
| |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) |
| { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return dest.regIndex(); |
| } |
| |
| int numSrcRegOperands() { return 0; } |
| int numDstRegOperands() { return dest.isVectorRegister(); } |
| int getNumOperands() { return 1; } |
| }; |
| |
| template<typename DestDataType> |
| class SpecialInst1Src : |
| public SpecialInst1SrcBase<typename DestDataType::OperandType> |
| { |
| public: |
| typedef typename DestDataType::CType DestCType; |
| |
| SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj, |
| const char *_opcode) |
| : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj, |
| _opcode) |
| { |
| } |
| }; |
| |
| class Ret : public SpecialInstNoSrcNoDest |
| { |
| public: |
| typedef SpecialInstNoSrcNoDest Base; |
| |
| Ret(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : Base(ib, obj, "ret") |
| { |
| setFlag(GPUStaticInst::Return); |
| } |
| |
| void execute(GPUDynInstPtr gpuDynInst); |
| }; |
| |
| class Barrier : public SpecialInstNoSrcNoDest |
| { |
| public: |
| typedef SpecialInstNoSrcNoDest Base; |
| uint8_t width; |
| |
| Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : Base(ib, obj, "barrier") |
| { |
| setFlag(GPUStaticInst::MemBarrier); |
| assert(ib->base.kind == Brig::BRIG_KIND_INST_BR); |
| width = (uint8_t)((Brig::BrigInstBr*)ib)->width; |
| } |
| |
| void execute(GPUDynInstPtr gpuDynInst); |
| }; |
| |
| class MemFence : public SpecialInstNoSrcNoDest |
| { |
| public: |
| typedef SpecialInstNoSrcNoDest Base; |
| |
| Brig::BrigMemoryOrder memFenceMemOrder; |
| Brig::BrigMemoryScope memFenceScopeSegGroup; |
| Brig::BrigMemoryScope memFenceScopeSegGlobal; |
| Brig::BrigMemoryScope memFenceScopeSegImage; |
| |
| MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : Base(ib, obj, "memfence") |
| { |
| assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE); |
| |
| memFenceScopeSegGlobal = (Brig::BrigMemoryScope) |
| ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope; |
| |
| memFenceScopeSegGroup = (Brig::BrigMemoryScope) |
| ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope; |
| |
| memFenceScopeSegImage = (Brig::BrigMemoryScope) |
| ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope; |
| |
| memFenceMemOrder = (Brig::BrigMemoryOrder) |
| ((Brig::BrigInstMemFence*)ib)->memoryOrder; |
| |
| setFlag(MemoryRef); |
| setFlag(GPUStaticInst::MemFence); |
| |
| switch (memFenceMemOrder) { |
| case Brig::BRIG_MEMORY_ORDER_NONE: |
| setFlag(NoOrder); |
| break; |
| case Brig::BRIG_MEMORY_ORDER_RELAXED: |
| setFlag(RelaxedOrder); |
| break; |
| case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE: |
| setFlag(Acquire); |
| break; |
| case Brig::BRIG_MEMORY_ORDER_SC_RELEASE: |
| setFlag(Release); |
| break; |
| case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: |
| setFlag(AcquireRelease); |
| break; |
| default: |
| fatal("MemInst has bad BrigMemoryOrder\n"); |
| } |
| |
| // set inst flags based on scopes |
| if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE && |
| memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { |
| setFlag(GPUStaticInst::GlobalSegment); |
| |
| /** |
| * A memory fence that has scope for |
| * both segments will use the global |
| * segment, and be executed in the |
| * global memory pipeline, therefore, |
| * we set the segment to match the |
| * global scope only |
| */ |
| switch (memFenceScopeSegGlobal) { |
| case Brig::BRIG_MEMORY_SCOPE_NONE: |
| setFlag(NoScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_WORKITEM: |
| setFlag(WorkitemScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: |
| setFlag(WorkgroupScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_AGENT: |
| setFlag(DeviceScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_SYSTEM: |
| setFlag(SystemScope); |
| break; |
| default: |
| fatal("MemFence has bad global scope type\n"); |
| } |
| } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) { |
| setFlag(GPUStaticInst::GlobalSegment); |
| |
| switch (memFenceScopeSegGlobal) { |
| case Brig::BRIG_MEMORY_SCOPE_NONE: |
| setFlag(NoScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_WORKITEM: |
| setFlag(WorkitemScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: |
| setFlag(WorkgroupScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_AGENT: |
| setFlag(DeviceScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_SYSTEM: |
| setFlag(SystemScope); |
| break; |
| default: |
| fatal("MemFence has bad global scope type\n"); |
| } |
| } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { |
| setFlag(GPUStaticInst::GroupSegment); |
| |
| switch (memFenceScopeSegGroup) { |
| case Brig::BRIG_MEMORY_SCOPE_NONE: |
| setFlag(NoScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_WORKITEM: |
| setFlag(WorkitemScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: |
| setFlag(WorkgroupScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_AGENT: |
| setFlag(DeviceScope); |
| break; |
| case Brig::BRIG_MEMORY_SCOPE_SYSTEM: |
| setFlag(SystemScope); |
| break; |
| default: |
| fatal("MemFence has bad group scope type\n"); |
| } |
| } else { |
| fatal("MemFence constructor: bad scope specifiers\n"); |
| } |
| } |
| |
| void |
| initiateAcc(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *wave = gpuDynInst->wavefront(); |
| wave->computeUnit->injectGlobalMemFence(gpuDynInst); |
| } |
| |
| void |
| execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| // 2 cases: |
| // * memfence to a sequentially consistent memory (e.g., LDS). |
| // These can be handled as no-ops. |
| // * memfence to a relaxed consistency cache (e.g., Hermes, Viper, |
| // etc.). We send a packet, tagged with the memory order and |
| // scope, and let the GPU coalescer handle it. |
| |
| if (isGlobalSeg()) { |
| gpuDynInst->simdId = w->simdId; |
| gpuDynInst->wfSlotId = w->wfSlotId; |
| gpuDynInst->wfDynId = w->wfDynId; |
| gpuDynInst->kern_id = w->kernId; |
| gpuDynInst->cu_id = w->computeUnit->cu_id; |
| |
| gpuDynInst->useContinuation = false; |
| GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe); |
| gmp->issueRequest(gpuDynInst); |
| |
| w->wrGmReqsInPipe--; |
| w->rdGmReqsInPipe--; |
| w->memReqsInPipe--; |
| w->outstandingReqs++; |
| } else if (isGroupSeg()) { |
| // no-op |
| } else { |
| fatal("MemFence execute: bad op type\n"); |
| } |
| } |
| }; |
| |
| class Call : public HsailGPUStaticInst |
| { |
| public: |
| // private helper functions |
| void calcAddr(Wavefront* w, GPUDynInstPtr m); |
| |
| void |
| generateDisassembly() |
| { |
| if (dest.disassemble() == "") { |
| disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(), |
| src1.disassemble()); |
| } else { |
| disassembly = csprintf("%s %s (%s) (%s)", opcode, |
| src0.disassemble(), dest.disassemble(), |
| src1.disassemble()); |
| } |
| } |
| |
| bool |
| isPseudoOp() |
| { |
| std::string func_name = src0.disassemble(); |
| if (func_name.find("__gem5_hsail_op") != std::string::npos) { |
| return true; |
| } |
| return false; |
| } |
| |
| // member variables |
| ListOperand dest; |
| FunctionRefOperand src0; |
| ListOperand src1; |
| HsailCode *func_ptr; |
| |
| // exec function for pseudo instructions mapped on top of call opcode |
| void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst); |
| |
| // user-defined pseudo instructions |
| void MagicPrintLane(Wavefront *w); |
| void MagicPrintLane64(Wavefront *w); |
| void MagicPrintWF32(Wavefront *w); |
| void MagicPrintWF64(Wavefront *w); |
| void MagicPrintWFFloat(Wavefront *w); |
| void MagicSimBreak(Wavefront *w); |
| void MagicPrefixSum(Wavefront *w); |
| void MagicReduction(Wavefront *w); |
| void MagicMaskLower(Wavefront *w); |
| void MagicMaskUpper(Wavefront *w); |
| void MagicJoinWFBar(Wavefront *w); |
| void MagicWaitWFBar(Wavefront *w); |
| void MagicPanic(Wavefront *w); |
| |
| void MagicAtomicNRAddGlobalU32Reg(Wavefront *w, |
| GPUDynInstPtr gpuDynInst); |
| |
| void MagicAtomicNRAddGroupU32Reg(Wavefront *w, |
| GPUDynInstPtr gpuDynInst); |
| |
| void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst); |
| |
| void MagicXactCasLd(Wavefront *w); |
| void MagicMostSigThread(Wavefront *w); |
| void MagicMostSigBroadcast(Wavefront *w); |
| |
| void MagicPrintWF32ID(Wavefront *w); |
| void MagicPrintWFID64(Wavefront *w); |
| |
| Call(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : HsailGPUStaticInst(obj, "call") |
| { |
| setFlag(ALU); |
| unsigned op_offs = obj->getOperandPtr(ib->operands, 0); |
| dest.init(op_offs, obj); |
| op_offs = obj->getOperandPtr(ib->operands, 1); |
| src0.init(op_offs, obj); |
| |
| func_ptr = nullptr; |
| std::string func_name = src0.disassemble(); |
| if (!isPseudoOp()) { |
| func_ptr = dynamic_cast<HsailCode*>(obj-> |
| getFunction(func_name)); |
| |
| if (!func_ptr) |
| fatal("call::exec cannot find function: %s\n", func_name); |
| } |
| |
| op_offs = obj->getOperandPtr(ib->operands, 2); |
| src1.init(op_offs, obj); |
| } |
| |
| bool isVectorRegister(int operandIndex) { return false; } |
| bool isCondRegister(int operandIndex) { return false; } |
| bool isScalarRegister(int operandIndex) { return false; } |
| bool isSrcOperand(int operandIndex) { return false; } |
| bool isDstOperand(int operandIndex) { return false; } |
| int getOperandSize(int operandIndex) { return 0; } |
| |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) |
| { |
| return -1; |
| } |
| |
| void |
| execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| std::string func_name = src0.disassemble(); |
| if (isPseudoOp()) { |
| execPseudoInst(w, gpuDynInst); |
| } else { |
| fatal("Native HSAIL functions are not yet implemented: %s\n", |
| func_name); |
| } |
| } |
| int numSrcRegOperands() { return 0; } |
| int numDstRegOperands() { return 0; } |
| int getNumOperands() { return 2; } |
| }; |
| |
| template<typename T> T heynot(T arg) { return ~arg; } |
| template<> inline bool heynot<bool>(bool arg) { return !arg; } |
| |
| |
| /* Explicitly declare template static member variables to avoid |
| * warnings in some clang versions |
| */ |
| template<> const char *B1::label; |
| template<> const char *B8::label; |
| template<> const char *B16::label; |
| template<> const char *B32::label; |
| template<> const char *B64::label; |
| template<> const char *S8::label; |
| template<> const char *S16::label; |
| template<> const char *S32::label; |
| template<> const char *S64::label; |
| template<> const char *U8::label; |
| template<> const char *U16::label; |
| template<> const char *U32::label; |
| template<> const char *U64::label; |
| template<> const char *F32::label; |
| template<> const char *F64::label; |
| |
| } // namespace HsailISA |
| |
| #endif // __ARCH_HSAIL_INSTS_DECL_HH__ |