src/arch/hsail/insts/main.cc - public/gem5 - Git at Google

 /*
  * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
  * For use for simulation and test purposes only
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  * this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its contributors
  * may be used to endorse or promote products derived from this software
  * without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Author: Steve Reinhardt
  */

 #include "arch/hsail/insts/decl.hh"
 #include "debug/GPUExec.hh"
 #include "gpu-compute/dispatcher.hh"
 #include "gpu-compute/simple_pool_manager.hh"

 namespace HsailISA
 {
     template<> const char *B1::label = "b1";
     template<> const char *B8::label = "b8";
     template<> const char *B16::label = "b16";
     template<> const char *B32::label = "b32";
     template<> const char *B64::label = "b64";

     template<> const char *S8::label = "s8";
     template<> const char *S16::label = "s16";
     template<> const char *S32::label = "s32";
     template<> const char *S64::label = "s64";

     template<> const char *U8::label = "u8";
     template<> const char *U16::label = "u16";
     template<> const char *U32::label = "u32";
     template<> const char *U64::label = "u64";

     template<> const char *F32::label = "f32";
     template<> const char *F64::label = "f64";

     const char*
     cmpOpToString(Brig::BrigCompareOperation cmpOp)
     {
         using namespace Brig;

         switch (cmpOp) {
           case BRIG_COMPARE_EQ:
             return "eq";
           case BRIG_COMPARE_NE:
             return "ne";
           case BRIG_COMPARE_LT:
             return "lt";
           case BRIG_COMPARE_LE:
             return "le";
           case BRIG_COMPARE_GT:
             return "gt";
           case BRIG_COMPARE_GE:
             return "ge";
           case BRIG_COMPARE_EQU:
             return "equ";
           case BRIG_COMPARE_NEU:
             return "neu";
           case BRIG_COMPARE_LTU:
             return "ltu";
           case BRIG_COMPARE_LEU:
             return "leu";
           case BRIG_COMPARE_GTU:
             return "gtu";
           case BRIG_COMPARE_GEU:
             return "geu";
           case BRIG_COMPARE_NUM:
             return "num";
           case BRIG_COMPARE_NAN:
             return "nan";
           case BRIG_COMPARE_SEQ:
             return "seq";
           case BRIG_COMPARE_SNE:
             return "sne";
           case BRIG_COMPARE_SLT:
             return "slt";
           case BRIG_COMPARE_SLE:
             return "sle";
           case BRIG_COMPARE_SGT:
             return "sgt";
           case BRIG_COMPARE_SGE:
             return "sge";
           case BRIG_COMPARE_SGEU:
             return "sgeu";
           case BRIG_COMPARE_SEQU:
             return "sequ";
           case BRIG_COMPARE_SNEU:
             return "sneu";
           case BRIG_COMPARE_SLTU:
             return "sltu";
           case BRIG_COMPARE_SLEU:
             return "sleu";
           case BRIG_COMPARE_SNUM:
             return "snum";
           case BRIG_COMPARE_SNAN:
             return "snan";
           case BRIG_COMPARE_SGTU:
             return "sgtu";
           default:
             return "unknown";
         }
     }

     void
     Ret::execute(GPUDynInstPtr gpuDynInst)
     {
         Wavefront *w = gpuDynInst->wavefront();

         const VectorMask &mask = w->getPred();

         // mask off completed work-items
         for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 w->initMask[lane] = 0;
             }

         }

         // delete extra instructions fetched for completed work-items
         w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
                                    w->instructionBuffer.end());
         if (w->pendingFetch) {
             w->dropFetch = true;
         }

         // if all work-items have completed, then wave-front is done
         if (w->initMask.none()) {
             w->status = Wavefront::S_STOPPED;

             int32_t refCount = w->computeUnit->getLds().
                                    decreaseRefCounter(w->dispatchId, w->wgId);

             DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
                             w->computeUnit->cu_id, w->wgId, refCount);

             // free the vector registers of the completed wavefront
             w->computeUnit->vectorRegsReserved[w->simdId] -=
                 w->reservedVectorRegs;

             assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);

             uint32_t endIndex = (w->startVgprIndex +
                                  w->reservedVectorRegs - 1) %
                 w->computeUnit->vrf[w->simdId]->numRegs();

             w->computeUnit->vrf[w->simdId]->manager->
                 freeRegion(w->startVgprIndex, endIndex);

             w->reservedVectorRegs = 0;
             w->startVgprIndex = 0;
             w->computeUnit->completedWfs++;

             DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
                     w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);

             if (!refCount) {
                 setFlag(SystemScope);
                 setFlag(Release);
                 setFlag(GlobalSegment);
                 // Notify Memory System of Kernel Completion
                 // Kernel End = isKernel + isRelease
                 w->status = Wavefront::S_RETURNING;
                 GPUDynInstPtr local_mempacket = gpuDynInst;
                 local_mempacket->useContinuation = false;
                 local_mempacket->simdId = w->simdId;
                 local_mempacket->wfSlotId = w->wfSlotId;
                 local_mempacket->wfDynId = w->wfDynId;
                 w->computeUnit->injectGlobalMemFence(local_mempacket, true);
             } else {
                 w->computeUnit->shader->dispatcher->scheduleDispatch();
             }
         }
     }

     void
     Barrier::execute(GPUDynInstPtr gpuDynInst)
     {
         Wavefront *w = gpuDynInst->wavefront();

         assert(w->barrierCnt == w->oldBarrierCnt);
         w->barrierCnt = w->oldBarrierCnt + 1;
         w->stalledAtBarrier = true;
     }
 } // namespace HsailISA
	/*
	* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
	* All rights reserved.
	*
	* For use for simulation and test purposes only
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are met:
	*
	* 1. Redistributions of source code must retain the above copyright notice,
	* this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright notice,
	* this list of conditions and the following disclaimer in the documentation
	* and/or other materials provided with the distribution.
	*
	* 3. Neither the name of the copyright holder nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	* POSSIBILITY OF SUCH DAMAGE.
	*
	* Author: Steve Reinhardt
	*/

	#include "arch/hsail/insts/decl.hh"
	#include "debug/GPUExec.hh"
	#include "gpu-compute/dispatcher.hh"
	#include "gpu-compute/simple_pool_manager.hh"

	namespace HsailISA
	{
	template<> const char *B1::label = "b1";
	template<> const char *B8::label = "b8";
	template<> const char *B16::label = "b16";
	template<> const char *B32::label = "b32";
	template<> const char *B64::label = "b64";

	template<> const char *S8::label = "s8";
	template<> const char *S16::label = "s16";
	template<> const char *S32::label = "s32";
	template<> const char *S64::label = "s64";

	template<> const char *U8::label = "u8";
	template<> const char *U16::label = "u16";
	template<> const char *U32::label = "u32";
	template<> const char *U64::label = "u64";

	template<> const char *F32::label = "f32";
	template<> const char *F64::label = "f64";

	const char*
	cmpOpToString(Brig::BrigCompareOperation cmpOp)
	{
	using namespace Brig;

	switch (cmpOp) {
	case BRIG_COMPARE_EQ:
	return "eq";
	case BRIG_COMPARE_NE:
	return "ne";
	case BRIG_COMPARE_LT:
	return "lt";
	case BRIG_COMPARE_LE:
	return "le";
	case BRIG_COMPARE_GT:
	return "gt";
	case BRIG_COMPARE_GE:
	return "ge";
	case BRIG_COMPARE_EQU:
	return "equ";
	case BRIG_COMPARE_NEU:
	return "neu";
	case BRIG_COMPARE_LTU:
	return "ltu";
	case BRIG_COMPARE_LEU:
	return "leu";
	case BRIG_COMPARE_GTU:
	return "gtu";
	case BRIG_COMPARE_GEU:
	return "geu";
	case BRIG_COMPARE_NUM:
	return "num";
	case BRIG_COMPARE_NAN:
	return "nan";
	case BRIG_COMPARE_SEQ:
	return "seq";
	case BRIG_COMPARE_SNE:
	return "sne";
	case BRIG_COMPARE_SLT:
	return "slt";
	case BRIG_COMPARE_SLE:
	return "sle";
	case BRIG_COMPARE_SGT:
	return "sgt";
	case BRIG_COMPARE_SGE:
	return "sge";
	case BRIG_COMPARE_SGEU:
	return "sgeu";
	case BRIG_COMPARE_SEQU:
	return "sequ";
	case BRIG_COMPARE_SNEU:
	return "sneu";
	case BRIG_COMPARE_SLTU:
	return "sltu";
	case BRIG_COMPARE_SLEU:
	return "sleu";
	case BRIG_COMPARE_SNUM:
	return "snum";
	case BRIG_COMPARE_SNAN:
	return "snan";
	case BRIG_COMPARE_SGTU:
	return "sgtu";
	default:
	return "unknown";
	}
	}

	void
	Ret::execute(GPUDynInstPtr gpuDynInst)
	{
	Wavefront *w = gpuDynInst->wavefront();

	const VectorMask &mask = w->getPred();

	// mask off completed work-items
	for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
	if (mask[lane]) {
	w->initMask[lane] = 0;
	}

	}

	// delete extra instructions fetched for completed work-items
	w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
	w->instructionBuffer.end());
	if (w->pendingFetch) {
	w->dropFetch = true;
	}

	// if all work-items have completed, then wave-front is done
	if (w->initMask.none()) {
	w->status = Wavefront::S_STOPPED;

	int32_t refCount = w->computeUnit->getLds().
	decreaseRefCounter(w->dispatchId, w->wgId);

	DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
	w->computeUnit->cu_id, w->wgId, refCount);

	// free the vector registers of the completed wavefront
	w->computeUnit->vectorRegsReserved[w->simdId] -=
	w->reservedVectorRegs;

	assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);

	uint32_t endIndex = (w->startVgprIndex +
	w->reservedVectorRegs - 1) %
	w->computeUnit->vrf[w->simdId]->numRegs();

	w->computeUnit->vrf[w->simdId]->manager->
	freeRegion(w->startVgprIndex, endIndex);

	w->reservedVectorRegs = 0;
	w->startVgprIndex = 0;
	w->computeUnit->completedWfs++;

	DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
	w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);

	if (!refCount) {
	setFlag(SystemScope);
	setFlag(Release);
	setFlag(GlobalSegment);
	// Notify Memory System of Kernel Completion
	// Kernel End = isKernel + isRelease
	w->status = Wavefront::S_RETURNING;
	GPUDynInstPtr local_mempacket = gpuDynInst;
	local_mempacket->useContinuation = false;
	local_mempacket->simdId = w->simdId;
	local_mempacket->wfSlotId = w->wfSlotId;
	local_mempacket->wfDynId = w->wfDynId;
	w->computeUnit->injectGlobalMemFence(local_mempacket, true);
	} else {
	w->computeUnit->shader->dispatcher->scheduleDispatch();
	}
	}
	}

	void
	Barrier::execute(GPUDynInstPtr gpuDynInst)
	{
	Wavefront *w = gpuDynInst->wavefront();

	assert(w->barrierCnt == w->oldBarrierCnt);
	w->barrierCnt = w->oldBarrierCnt + 1;
	w->stalledAtBarrier = true;
	}
	} // namespace HsailISA