| /* |
| * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. |
| * All rights reserved. |
| * |
| * For use for simulation and test purposes only |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * 3. Neither the name of the copyright holder nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Author: Steve Reinhardt |
| */ |
| |
| #ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__ |
| #define __ARCH_HSAIL_INSTS_BRANCH_HH__ |
| |
| #include "arch/hsail/insts/gpu_static_inst.hh" |
| #include "arch/hsail/operand.hh" |
| #include "gpu-compute/gpu_dyn_inst.hh" |
| #include "gpu-compute/wavefront.hh" |
| |
| namespace HsailISA |
| { |
| |
| // The main difference between a direct branch and an indirect branch |
| // is whether the target is a register or a label, so we can share a |
| // lot of code if we template the base implementation on that type. |
| template<typename TargetType> |
| class BrnInstBase : public HsailGPUStaticInst |
| { |
| public: |
| void generateDisassembly() override; |
| |
| Brig::BrigWidth8_t width; |
| TargetType target; |
| |
| BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : HsailGPUStaticInst(obj, "brn") |
| { |
| setFlag(Branch); |
| setFlag(UnconditionalJump); |
| width = ((Brig::BrigInstBr*)ib)->width; |
| unsigned op_offs = obj->getOperandPtr(ib->operands, 0); |
| target.init(op_offs, obj); |
| } |
| |
| uint32_t getTargetPc() override { return target.getTarget(0, 0); } |
| |
| bool isVectorRegister(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.isVectorRegister(); |
| } |
| bool isCondRegister(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.isCondRegister(); |
| } |
| bool isScalarRegister(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.isScalarRegister(); |
| } |
| |
| bool isSrcOperand(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return true; |
| } |
| |
| bool isDstOperand(int operandIndex) override { |
| return false; |
| } |
| |
| int getOperandSize(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.opSize(); |
| } |
| |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override |
| { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.regIndex(); |
| } |
| |
| int getNumOperands() override { |
| return 1; |
| } |
| |
| void execute(GPUDynInstPtr gpuDynInst) override; |
| }; |
| |
| template<typename TargetType> |
| void |
| BrnInstBase<TargetType>::generateDisassembly() |
| { |
| std::string widthClause; |
| |
| if (width != 1) { |
| widthClause = csprintf("_width(%d)", width); |
| } |
| |
| disassembly = csprintf("%s%s %s", opcode, widthClause, |
| target.disassemble()); |
| } |
| |
| template<typename TargetType> |
| void |
| BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| if (getTargetPc() == w->rpc()) { |
| w->popFromReconvergenceStack(); |
| } else { |
| // Rpc and execution mask remain the same |
| w->pc(getTargetPc()); |
| } |
| } |
| |
| class BrnDirectInst : public BrnInstBase<LabelOperand> |
| { |
| public: |
| BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : BrnInstBase<LabelOperand>(ib, obj) |
| { |
| } |
| int numSrcRegOperands() { return 0; } |
| int numDstRegOperands() { return 0; } |
| }; |
| |
| class BrnIndirectInst : public BrnInstBase<SRegOperand> |
| { |
| public: |
| BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : BrnInstBase<SRegOperand>(ib, obj) |
| { |
| } |
| int numSrcRegOperands() { return target.isVectorRegister(); } |
| int numDstRegOperands() { return 0; } |
| }; |
| |
| GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib, |
| const BrigObject *obj); |
| |
| template<typename TargetType> |
| class CbrInstBase : public HsailGPUStaticInst |
| { |
| public: |
| void generateDisassembly() override; |
| |
| Brig::BrigWidth8_t width; |
| CRegOperand cond; |
| TargetType target; |
| |
| CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : HsailGPUStaticInst(obj, "cbr") |
| { |
| setFlag(Branch); |
| width = ((Brig::BrigInstBr *)ib)->width; |
| unsigned op_offs = obj->getOperandPtr(ib->operands, 0); |
| cond.init(op_offs, obj); |
| op_offs = obj->getOperandPtr(ib->operands, 1); |
| target.init(op_offs, obj); |
| } |
| |
| uint32_t getTargetPc() override { return target.getTarget(0, 0); } |
| |
| void execute(GPUDynInstPtr gpuDynInst) override; |
| // Assumption: Target is operand 0, Condition Register is operand 1 |
| bool isVectorRegister(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| if (!operandIndex) |
| return target.isVectorRegister(); |
| else |
| return false; |
| } |
| bool isCondRegister(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| if (!operandIndex) |
| return target.isCondRegister(); |
| else |
| return true; |
| } |
| bool isScalarRegister(int operandIndex) override { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (!operandIndex) |
| return target.isScalarRegister(); |
| else |
| return false; |
| } |
| bool isSrcOperand(int operandIndex) override { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| if (operandIndex == 0) |
| return true; |
| return false; |
| } |
| // both Condition Register and Target are source operands |
| bool isDstOperand(int operandIndex) override { |
| return false; |
| } |
| int getOperandSize(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| if (!operandIndex) |
| return target.opSize(); |
| else |
| return 1; |
| } |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override |
| { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| if (!operandIndex) |
| return target.regIndex(); |
| else |
| return -1; |
| } |
| |
| // Operands = Target, Condition Register |
| int getNumOperands() override { |
| return 2; |
| } |
| }; |
| |
| template<typename TargetType> |
| void |
| CbrInstBase<TargetType>::generateDisassembly() |
| { |
| std::string widthClause; |
| |
| if (width != 1) { |
| widthClause = csprintf("_width(%d)", width); |
| } |
| |
| disassembly = csprintf("%s%s %s,%s", opcode, widthClause, |
| cond.disassemble(), target.disassemble()); |
| } |
| |
| template<typename TargetType> |
| void |
| CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| const uint32_t curr_pc M5_VAR_USED = w->pc(); |
| const uint32_t curr_rpc = w->rpc(); |
| const VectorMask curr_mask = w->execMask(); |
| |
| /** |
| * TODO: can we move this pop outside the instruction, and |
| * into the wavefront? |
| */ |
| w->popFromReconvergenceStack(); |
| |
| // immediate post-dominator instruction |
| const uint32_t rpc = static_cast<uint32_t>(ipdInstNum()); |
| if (curr_rpc != rpc) { |
| w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask); |
| } |
| |
| // taken branch |
| const uint32_t true_pc = getTargetPc(); |
| VectorMask true_mask; |
| for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane]; |
| } |
| |
| // not taken branch |
| const uint32_t false_pc = nextInstAddr(); |
| assert(true_pc != false_pc); |
| if (false_pc != rpc && true_mask.count() < curr_mask.count()) { |
| VectorMask false_mask = curr_mask & ~true_mask; |
| w->pushToReconvergenceStack(false_pc, rpc, false_mask); |
| } |
| |
| if (true_pc != rpc && true_mask.count()) { |
| w->pushToReconvergenceStack(true_pc, rpc, true_mask); |
| } |
| assert(w->pc() != curr_pc); |
| } |
| |
| |
| class CbrDirectInst : public CbrInstBase<LabelOperand> |
| { |
| public: |
| CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : CbrInstBase<LabelOperand>(ib, obj) |
| { |
| } |
| // the source operand of a conditional branch is a Condition |
| // Register which is not stored in the VRF |
| // so we do not count it as a source-register operand |
| // even though, formally, it is one. |
| int numSrcRegOperands() { return 0; } |
| int numDstRegOperands() { return 0; } |
| }; |
| |
| class CbrIndirectInst : public CbrInstBase<SRegOperand> |
| { |
| public: |
| CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : CbrInstBase<SRegOperand>(ib, obj) |
| { |
| } |
| // one source operand of the conditional indirect branch is a Condition |
| // register which is not stored in the VRF so we do not count it |
| // as a source-register operand even though, formally, it is one. |
| int numSrcRegOperands() { return target.isVectorRegister(); } |
| int numDstRegOperands() { return 0; } |
| }; |
| |
| GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib, |
| const BrigObject *obj); |
| |
| template<typename TargetType> |
| class BrInstBase : public HsailGPUStaticInst |
| { |
| public: |
| void generateDisassembly() override; |
| |
| ImmOperand<uint32_t> width; |
| TargetType target; |
| |
| BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : HsailGPUStaticInst(obj, "br") |
| { |
| setFlag(Branch); |
| setFlag(UnconditionalJump); |
| width.init(((Brig::BrigInstBr *)ib)->width, obj); |
| unsigned op_offs = obj->getOperandPtr(ib->operands, 0); |
| target.init(op_offs, obj); |
| } |
| |
| uint32_t getTargetPc() override { return target.getTarget(0, 0); } |
| |
| void execute(GPUDynInstPtr gpuDynInst) override; |
| bool isVectorRegister(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.isVectorRegister(); |
| } |
| bool isCondRegister(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.isCondRegister(); |
| } |
| bool isScalarRegister(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.isScalarRegister(); |
| } |
| bool isSrcOperand(int operandIndex) override { |
| assert((operandIndex >= 0) && (operandIndex < getNumOperands())); |
| return true; |
| } |
| bool isDstOperand(int operandIndex) override { return false; } |
| int getOperandSize(int operandIndex) override { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.opSize(); |
| } |
| int |
| getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override |
| { |
| assert(operandIndex >= 0 && operandIndex < getNumOperands()); |
| return target.regIndex(); |
| } |
| int getNumOperands() override { return 1; } |
| }; |
| |
| template<typename TargetType> |
| void |
| BrInstBase<TargetType>::generateDisassembly() |
| { |
| std::string widthClause; |
| |
| if (width.bits != 1) { |
| widthClause = csprintf("_width(%d)", width.bits); |
| } |
| |
| disassembly = csprintf("%s%s %s", opcode, widthClause, |
| target.disassemble()); |
| } |
| |
| template<typename TargetType> |
| void |
| BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| if (getTargetPc() == w->rpc()) { |
| w->popFromReconvergenceStack(); |
| } else { |
| // Rpc and execution mask remain the same |
| w->pc(getTargetPc()); |
| } |
| } |
| |
| class BrDirectInst : public BrInstBase<LabelOperand> |
| { |
| public: |
| BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : BrInstBase<LabelOperand>(ib, obj) |
| { |
| } |
| |
| int numSrcRegOperands() { return 0; } |
| int numDstRegOperands() { return 0; } |
| }; |
| |
| class BrIndirectInst : public BrInstBase<SRegOperand> |
| { |
| public: |
| BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : BrInstBase<SRegOperand>(ib, obj) |
| { |
| } |
| int numSrcRegOperands() { return target.isVectorRegister(); } |
| int numDstRegOperands() { return 0; } |
| }; |
| |
| GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib, |
| const BrigObject *obj); |
| } // namespace HsailISA |
| |
| #endif // __ARCH_HSAIL_INSTS_BRANCH_HH__ |