src/gpu-compute/vector_register_file.cc - arm/gem5 - Git at Google

 /*
  * Copyright (c) 2015 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
  * For use for simulation and test purposes only
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  * this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its contributors
  * may be used to endorse or promote products derived from this software
  * without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Author: John Kalamatianos
  */

 #include "gpu-compute/vector_register_file.hh"

 #include <string>

 #include "base/logging.hh"
 #include "gpu-compute/compute_unit.hh"
 #include "gpu-compute/gpu_dyn_inst.hh"
 #include "gpu-compute/shader.hh"
 #include "gpu-compute/simple_pool_manager.hh"
 #include "gpu-compute/wavefront.hh"
 #include "params/VectorRegisterFile.hh"

 VectorRegisterFile::VectorRegisterFile(const VectorRegisterFileParams *p)
     : SimObject(p),
       manager(new SimplePoolManager(p->min_alloc, p->num_regs_per_simd)),
       simdId(p->simd_id), numRegsPerSimd(p->num_regs_per_simd),
       vgprState(new VecRegisterState())
 {
     fatal_if(numRegsPerSimd % 2, "VRF size is illegal\n");
     fatal_if(simdId < 0, "Illegal SIMD id for VRF");

     fatal_if(numRegsPerSimd % p->min_alloc, "Min VGPR region allocation is not "
              "multiple of VRF size\n");

     busy.clear();
     busy.resize(numRegsPerSimd, 0);
     nxtBusy.clear();
     nxtBusy.resize(numRegsPerSimd, 0);

     vgprState->init(numRegsPerSimd, p->wfSize);
 }

 void
 VectorRegisterFile::setParent(ComputeUnit *_computeUnit)
 {
     computeUnit = _computeUnit;
     vgprState->setParent(computeUnit);
 }

 uint8_t
 VectorRegisterFile::regNxtBusy(int idx, uint32_t operandSize) const
 {
     uint8_t status = nxtBusy.at(idx);

     if (operandSize > 4) {
         status = status | (nxtBusy.at((idx + 1) % numRegs()));
     }

     return status;
 }

 uint8_t
 VectorRegisterFile::regBusy(int idx, uint32_t operandSize) const
 {
     uint8_t status = busy.at(idx);

     if (operandSize > 4) {
         status = status | (busy.at((idx + 1) % numRegs()));
     }

     return status;
 }

 void
 VectorRegisterFile::preMarkReg(int regIdx, uint32_t operandSize, uint8_t value)
 {
     nxtBusy.at(regIdx) = value;

     if (operandSize > 4) {
         nxtBusy.at((regIdx + 1) % numRegs()) = value;
     }
 }

 void
 VectorRegisterFile::markReg(int regIdx, uint32_t operandSize, uint8_t value)
 {
     busy.at(regIdx) = value;

     if (operandSize > 4) {
         busy.at((regIdx + 1) % numRegs()) = value;
     }
 }

 bool
 VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
 {
     for (int i = 0; i < ii->getNumOperands(); ++i) {
         if (ii->isVectorRegister(i)) {
             uint32_t vgprIdx = ii->getRegisterIndex(i, ii);
             uint32_t pVgpr = w->remap(vgprIdx, ii->getOperandSize(i), 1);

             if (regBusy(pVgpr, ii->getOperandSize(i)) == 1) {
                 if (ii->isDstOperand(i)) {
                     w->numTimesBlockedDueWAXDependencies++;
                 } else if (ii->isSrcOperand(i)) {
                     w->numTimesBlockedDueRAWDependencies++;
                 }

                 return false;
             }

             if (regNxtBusy(pVgpr, ii->getOperandSize(i)) == 1) {
                 if (ii->isDstOperand(i)) {
                     w->numTimesBlockedDueWAXDependencies++;
                 } else if (ii->isSrcOperand(i)) {
                     w->numTimesBlockedDueRAWDependencies++;
                 }

                 return false;
             }
         }
     }

     return true;
 }

 void
 VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w)
 {
     bool loadInstr = ii->isLoad();
     bool atomicInstr = ii->isAtomic() || ii->isMemFence();

     bool loadNoArgInstr = loadInstr && !ii->isArgLoad();

     // iterate over all register destination operands
     for (int i = 0; i < ii->getNumOperands(); ++i) {
         if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
             uint32_t physReg = w->remap(ii->getRegisterIndex(i, ii),
                                         ii->getOperandSize(i), 1);

             // mark the destination vector register as busy
             markReg(physReg, ii->getOperandSize(i), 1);
             // clear the in-flight status of the destination vector register
             preMarkReg(physReg, ii->getOperandSize(i), 0);

             // FIXME: if we ever model correct timing behavior
             // for load argument instructions then we should not
             // set the destination register as busy now but when
             // the data returns. Loads and Atomics should free
             // their destination registers when the data returns,
             // not now
             if (!atomicInstr && !loadNoArgInstr) {
                 uint32_t pipeLen = ii->getOperandSize(i) <= 4 ?
                     computeUnit->spBypassLength() :
                     computeUnit->dpBypassLength();

                 // schedule an event for marking the register as ready
                 computeUnit->registerEvent(w->simdId, physReg,
                                            ii->getOperandSize(i),
                                            computeUnit->shader->tick_cnt +
                                            computeUnit->shader->ticks(pipeLen),
                                            0);
             }
         }
     }
 }

 int
 VectorRegisterFile::exec(uint64_t dynamic_id, Wavefront *w,
                          std::vector<uint32_t> &regVec, uint32_t operandSize,
                          uint64_t timestamp)
 {
     int delay = 0;

     panic_if(regVec.size() <= 0, "Illegal VGPR vector size=%d\n",
              regVec.size());

     for (int i = 0; i < regVec.size(); ++i) {
         // mark the destination VGPR as free when the timestamp expires
         computeUnit->registerEvent(w->simdId, regVec[i], operandSize,
                                    computeUnit->shader->tick_cnt + timestamp +
                                    computeUnit->shader->ticks(delay), 0);
     }

     return delay;
 }

 void
 VectorRegisterFile::updateResources(Wavefront *w, GPUDynInstPtr ii)
 {
     // iterate over all register destination operands
     for (int i = 0; i < ii->getNumOperands(); ++i) {
         if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
             uint32_t physReg = w->remap(ii->getRegisterIndex(i, ii),
                                         ii->getOperandSize(i), 1);
             // set the in-flight status of the destination vector register
             preMarkReg(physReg, ii->getOperandSize(i), 1);
         }
     }
 }

 bool
 VectorRegisterFile::vrfOperandAccessReady(uint64_t dynamic_id, Wavefront *w,
                                           GPUDynInstPtr ii,
                                           VrfAccessType accessType)
 {
     bool ready = true;

     return ready;
 }

 bool
 VectorRegisterFile::vrfOperandAccessReady(Wavefront *w, GPUDynInstPtr ii,
                                           VrfAccessType accessType)
 {
     bool ready = true;

     return ready;
 }

 VectorRegisterFile*
 VectorRegisterFileParams::create()
 {
     return new VectorRegisterFile(this);
 }
	/*
	* Copyright (c) 2015 Advanced Micro Devices, Inc.
	* All rights reserved.
	*
	* For use for simulation and test purposes only
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are met:
	*
	* 1. Redistributions of source code must retain the above copyright notice,
	* this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright notice,
	* this list of conditions and the following disclaimer in the documentation
	* and/or other materials provided with the distribution.
	*
	* 3. Neither the name of the copyright holder nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	* POSSIBILITY OF SUCH DAMAGE.
	*
	* Author: John Kalamatianos
	*/

	#include "gpu-compute/vector_register_file.hh"

	#include <string>

	#include "base/logging.hh"
	#include "gpu-compute/compute_unit.hh"
	#include "gpu-compute/gpu_dyn_inst.hh"
	#include "gpu-compute/shader.hh"
	#include "gpu-compute/simple_pool_manager.hh"
	#include "gpu-compute/wavefront.hh"
	#include "params/VectorRegisterFile.hh"

	VectorRegisterFile::VectorRegisterFile(const VectorRegisterFileParams *p)
	: SimObject(p),
	manager(new SimplePoolManager(p->min_alloc, p->num_regs_per_simd)),
	simdId(p->simd_id), numRegsPerSimd(p->num_regs_per_simd),
	vgprState(new VecRegisterState())
	{
	fatal_if(numRegsPerSimd % 2, "VRF size is illegal\n");
	fatal_if(simdId < 0, "Illegal SIMD id for VRF");

	fatal_if(numRegsPerSimd % p->min_alloc, "Min VGPR region allocation is not "
	"multiple of VRF size\n");

	busy.clear();
	busy.resize(numRegsPerSimd, 0);
	nxtBusy.clear();
	nxtBusy.resize(numRegsPerSimd, 0);

	vgprState->init(numRegsPerSimd, p->wfSize);
	}

	void
	VectorRegisterFile::setParent(ComputeUnit *_computeUnit)
	{
	computeUnit = _computeUnit;
	vgprState->setParent(computeUnit);
	}

	uint8_t
	VectorRegisterFile::regNxtBusy(int idx, uint32_t operandSize) const
	{
	uint8_t status = nxtBusy.at(idx);

	if (operandSize > 4) {
	status = status \| (nxtBusy.at((idx + 1) % numRegs()));
	}

	return status;
	}

	uint8_t
	VectorRegisterFile::regBusy(int idx, uint32_t operandSize) const
	{
	uint8_t status = busy.at(idx);

	if (operandSize > 4) {
	status = status \| (busy.at((idx + 1) % numRegs()));
	}

	return status;
	}

	void
	VectorRegisterFile::preMarkReg(int regIdx, uint32_t operandSize, uint8_t value)
	{
	nxtBusy.at(regIdx) = value;

	if (operandSize > 4) {
	nxtBusy.at((regIdx + 1) % numRegs()) = value;
	}
	}

	void
	VectorRegisterFile::markReg(int regIdx, uint32_t operandSize, uint8_t value)
	{
	busy.at(regIdx) = value;

	if (operandSize > 4) {
	busy.at((regIdx + 1) % numRegs()) = value;
	}
	}

	bool
	VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
	{
	for (int i = 0; i < ii->getNumOperands(); ++i) {
	if (ii->isVectorRegister(i)) {
	uint32_t vgprIdx = ii->getRegisterIndex(i, ii);
	uint32_t pVgpr = w->remap(vgprIdx, ii->getOperandSize(i), 1);

	if (regBusy(pVgpr, ii->getOperandSize(i)) == 1) {
	if (ii->isDstOperand(i)) {
	w->numTimesBlockedDueWAXDependencies++;
	} else if (ii->isSrcOperand(i)) {
	w->numTimesBlockedDueRAWDependencies++;
	}

	return false;
	}

	if (regNxtBusy(pVgpr, ii->getOperandSize(i)) == 1) {
	if (ii->isDstOperand(i)) {
	w->numTimesBlockedDueWAXDependencies++;
	} else if (ii->isSrcOperand(i)) {
	w->numTimesBlockedDueRAWDependencies++;
	}

	return false;
	}
	}
	}

	return true;
	}

	void
	VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w)
	{
	bool loadInstr = ii->isLoad();
	bool atomicInstr = ii->isAtomic() \|\| ii->isMemFence();

	bool loadNoArgInstr = loadInstr && !ii->isArgLoad();

	// iterate over all register destination operands
	for (int i = 0; i < ii->getNumOperands(); ++i) {
	if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
	uint32_t physReg = w->remap(ii->getRegisterIndex(i, ii),
	ii->getOperandSize(i), 1);

	// mark the destination vector register as busy
	markReg(physReg, ii->getOperandSize(i), 1);
	// clear the in-flight status of the destination vector register
	preMarkReg(physReg, ii->getOperandSize(i), 0);

	// FIXME: if we ever model correct timing behavior
	// for load argument instructions then we should not
	// set the destination register as busy now but when
	// the data returns. Loads and Atomics should free
	// their destination registers when the data returns,
	// not now
	if (!atomicInstr && !loadNoArgInstr) {
	uint32_t pipeLen = ii->getOperandSize(i) <= 4 ?
	computeUnit->spBypassLength() :
	computeUnit->dpBypassLength();

	// schedule an event for marking the register as ready
	computeUnit->registerEvent(w->simdId, physReg,
	ii->getOperandSize(i),
	computeUnit->shader->tick_cnt +
	computeUnit->shader->ticks(pipeLen),
	0);
	}
	}
	}
	}

	int
	VectorRegisterFile::exec(uint64_t dynamic_id, Wavefront *w,
	std::vector<uint32_t> &regVec, uint32_t operandSize,
	uint64_t timestamp)
	{
	int delay = 0;

	panic_if(regVec.size() <= 0, "Illegal VGPR vector size=%d\n",
	regVec.size());

	for (int i = 0; i < regVec.size(); ++i) {
	// mark the destination VGPR as free when the timestamp expires
	computeUnit->registerEvent(w->simdId, regVec[i], operandSize,
	computeUnit->shader->tick_cnt + timestamp +
	computeUnit->shader->ticks(delay), 0);
	}

	return delay;
	}

	void
	VectorRegisterFile::updateResources(Wavefront *w, GPUDynInstPtr ii)
	{
	// iterate over all register destination operands
	for (int i = 0; i < ii->getNumOperands(); ++i) {
	if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
	uint32_t physReg = w->remap(ii->getRegisterIndex(i, ii),
	ii->getOperandSize(i), 1);
	// set the in-flight status of the destination vector register
	preMarkReg(physReg, ii->getOperandSize(i), 1);
	}
	}
	}

	bool
	VectorRegisterFile::vrfOperandAccessReady(uint64_t dynamic_id, Wavefront *w,
	GPUDynInstPtr ii,
	VrfAccessType accessType)
	{
	bool ready = true;

	return ready;
	}

	bool
	VectorRegisterFile::vrfOperandAccessReady(Wavefront *w, GPUDynInstPtr ii,
	VrfAccessType accessType)
	{
	bool ready = true;

	return ready;
	}

	VectorRegisterFile*
	VectorRegisterFileParams::create()
	{
	return new VectorRegisterFile(this);
	}