src/cpu/testers/gpu_ruby_test/dma_thread.cc - public/gem5 - Git at Google

 /*
  * Copyright (c) 2021 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
  * For use for simulation and test purposes only
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  * this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its
  * contributors may be used to endorse or promote products derived from this
  * software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */

 #include "cpu/testers/gpu_ruby_test/dma_thread.hh"

 #include "debug/ProtocolTest.hh"

 namespace gem5
 {

 DmaThread::DmaThread(const Params& _params)
     : TesterThread(_params)
 {
     threadName = "DmaThread(Thread ID " + std::to_string(threadId) + ")";
     threadEvent.setDesc("DmaThread tick");
     assert(numLanes == 1);
 }

 DmaThread::~DmaThread()
 {

 }

 void
 DmaThread::issueLoadOps()
 {
     assert(curAction);
     assert(curAction->getType() == Episode::Action::Type::LOAD);
     // we should not have any outstanding fence or atomic op at this point
     assert(pendingFenceCount == 0);
     assert(pendingAtomicCount == 0);

     // DMA thread is a scalar thread so always set lane to zero. This allows
     // us to reuse the API for GPU threads rather than have a specific API
     // for scalar tester threads
     int lane = 0;

     Location location = curAction->getLocation(lane);
     assert(location >= AddressManager::INVALID_LOCATION);

     if (location >= 0) {
         Addr address = addrManager->getAddress(location);
         DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
                 this->getName(), curEpisode->getEpisodeId(),
                 ruby::printAddress(address));

         int load_size = sizeof(Value);

         // for now, assert address is 4-byte aligned
         assert(address % load_size == 0);

         auto req = std::make_shared<Request>(address, load_size,
                                              0, tester->requestorId(),
                                              0, threadId, nullptr);
         req->setPaddr(address);
         req->setReqInstSeqNum(tester->getActionSeqNum());

         PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
         uint8_t* data = new uint8_t[load_size];
         pkt->dataDynamic(data);
         pkt->senderState = new ProtocolTester::SenderState(this);

         if (!port->sendTimingReq(pkt)) {
             panic("Not expected failed sendTimingReq\n");
         }

         // insert an outstanding load
         addOutstandingReqs(outstandingLoads, address, lane, location);

         // increment the number of outstanding ld_st requests
         pendingLdStCount++;
     }
 }

 void
 DmaThread::issueStoreOps()
 {
     assert(curAction);
     assert(curAction->getType() == Episode::Action::Type::STORE);
     // we should not have any outstanding fence or atomic op at this point
     assert(pendingFenceCount == 0);
     assert(pendingAtomicCount == 0);

     // DMA thread is a scalar thread so always set lane to zero. This allows
     // us to reuse the API for GPU threads rather than have a specific API
     // for scalar tester threads
     int lane = 0;

     Location location = curAction->getLocation(lane);
     assert(location >= AddressManager::INVALID_LOCATION);

     if (location >= 0) {
         // prepare the next value to store
         Value new_value = addrManager->getLoggedValue(location) + 1;

         Addr address = addrManager->getAddress(location);
         // must be aligned with store size
         assert(address % sizeof(Value) == 0);

         DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
                 "Value %d\n", this->getName(),
                 curEpisode->getEpisodeId(), ruby::printAddress(address),
                 new_value);

         auto req = std::make_shared<Request>(address, sizeof(Value),
                                              0, tester->requestorId(), 0,
                                              threadId, nullptr);
         req->setPaddr(address);
         req->setReqInstSeqNum(tester->getActionSeqNum());

         PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
         uint8_t *writeData = new uint8_t[sizeof(Value)];
         for (int j = 0; j < sizeof(Value); ++j) {
             writeData[j] = ((uint8_t*)&new_value)[j];
         }
         pkt->dataDynamic(writeData);
         pkt->senderState = new ProtocolTester::SenderState(this);

         if (!port->sendTimingReq(pkt)) {
             panic("Not expecting a failed sendTimingReq\n");
         }

         // add an outstanding store
         addOutstandingReqs(outstandingStores, address, lane, location,
                            new_value);

         // increment the number of outstanding ld_st requests
         pendingLdStCount++;
     }
 }

 void
 DmaThread::issueAtomicOps()
 {
     DPRINTF(ProtocolTest, "Issuing Atomic Op ...\n");

     assert(curAction);
     assert(curAction->getType() == Episode::Action::Type::ATOMIC);
     // we should not have any outstanding ops at this point
     assert(pendingFenceCount == 0);
     assert(pendingLdStCount == 0);
     assert(pendingAtomicCount == 0);

     // no-op: No DMA protocol exists with Atomics
 }

 void
 DmaThread::issueAcquireOp()
 {
     DPRINTF(ProtocolTest, "Issuing Acquire Op ...\n");

     assert(curAction);
     assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
     // we should not have any outstanding ops at this point
     assert(pendingFenceCount == 0);
     assert(pendingLdStCount == 0);
     assert(pendingAtomicCount == 0);

     // no-op: Acquire does not apply to DMA threads
 }

 void
 DmaThread::issueReleaseOp()
 {
     DPRINTF(ProtocolTest, "Issuing Release Op ...\n");

     assert(curAction);
     assert(curAction->getType() == Episode::Action::Type::RELEASE);
     // we should not have any outstanding ops at this point
     assert(pendingFenceCount == 0);
     assert(pendingLdStCount == 0);
     assert(pendingAtomicCount == 0);

     // no-op: Release does not apply to DMA threads
 }

 void
 DmaThread::hitCallback(PacketPtr pkt)
 {
     assert(pkt);
     MemCmd resp_cmd = pkt->cmd;
     Addr addr = pkt->getAddr();

     DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s -"
             " Addr %s\n", this->getName(), curEpisode->getEpisodeId(),
             resp_cmd.toString(), ruby::printAddress(addr));

     if (resp_cmd == MemCmd::SwapResp) {
         // response to a pending atomic
         assert(pendingAtomicCount > 0);
         assert(pendingLdStCount == 0);
         assert(outstandingAtomics.count(addr) > 0);

         // get return data
         Value value = *(pkt->getPtr<Value>());

         // validate atomic op return
         OutstandingReq req = popOutstandingReq(outstandingAtomics, addr);
         assert(req.lane == 0);
         validateAtomicResp(req.origLoc, req.lane, value);

         // update log table
         addrManager->updateLogTable(req.origLoc, threadId,
                                     curEpisode->getEpisodeId(), value,
                                     curTick(),
                                     0);

         // this Atomic is done
         pendingAtomicCount--;
     } else if (resp_cmd == MemCmd::ReadResp) {
         // response to a pending read
         assert(pendingLdStCount > 0);
         assert(pendingAtomicCount == 0);
         assert(outstandingLoads.count(addr) > 0);

         // get return data
         Value value = *(pkt->getPtr<Value>());
         OutstandingReq req = popOutstandingReq(outstandingLoads, addr);
         assert(req.lane == 0);
         validateLoadResp(req.origLoc, req.lane, value);

         // this Read is done
         pendingLdStCount--;
     } else if (resp_cmd == MemCmd::WriteResp) {
         // response to a pending write
         assert(pendingLdStCount > 0);
         assert(pendingAtomicCount == 0);

         // no need to validate Write response
         // just pop it from the outstanding req table so that subsequent
         // requests dependent on this write can proceed
         // note that unlike GpuWavefront we do decrement pendingLdStCount here
         // since the write is guaranteed to be completed in downstream memory.
         assert(outstandingStores.count(addr) > 0);
         OutstandingReq req = popOutstandingReq(outstandingStores, addr);
         assert(req.storedValue != AddressManager::INVALID_VALUE);

         // update log table
         addrManager->updateLogTable(req.origLoc, threadId,
                                     curEpisode->getEpisodeId(),
                                     req.storedValue,
                                     curTick(),
                                     0);

         // the Write is now done
         pendingLdStCount--;
     } else {
         panic("UnsupportedMemCmd response type: %s",
               resp_cmd.toString().c_str());
     }

     delete pkt->senderState;
     delete pkt;

     // record the last active cycle to check for deadlock
     lastActiveCycle = curCycle();

     // we may be able to issue an action. Let's check
     if (!threadEvent.scheduled()) {
         scheduleWakeup();
     }
 }

 } // namespace gem5
	/*
	* Copyright (c) 2021 Advanced Micro Devices, Inc.
	* All rights reserved.
	*
	* For use for simulation and test purposes only
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are met:
	*
	* 1. Redistributions of source code must retain the above copyright notice,
	* this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright notice,
	* this list of conditions and the following disclaimer in the documentation
	* and/or other materials provided with the distribution.
	*
	* 3. Neither the name of the copyright holder nor the names of its
	* contributors may be used to endorse or promote products derived from this
	* software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	* POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "cpu/testers/gpu_ruby_test/dma_thread.hh"

	#include "debug/ProtocolTest.hh"

	namespace gem5
	{

	DmaThread::DmaThread(const Params& _params)
	: TesterThread(_params)
	{
	threadName = "DmaThread(Thread ID " + std::to_string(threadId) + ")";
	threadEvent.setDesc("DmaThread tick");
	assert(numLanes == 1);
	}

	DmaThread::~DmaThread()
	{

	}

	void
	DmaThread::issueLoadOps()
	{
	assert(curAction);
	assert(curAction->getType() == Episode::Action::Type::LOAD);
	// we should not have any outstanding fence or atomic op at this point
	assert(pendingFenceCount == 0);
	assert(pendingAtomicCount == 0);

	// DMA thread is a scalar thread so always set lane to zero. This allows
	// us to reuse the API for GPU threads rather than have a specific API
	// for scalar tester threads
	int lane = 0;

	Location location = curAction->getLocation(lane);
	assert(location >= AddressManager::INVALID_LOCATION);

	if (location >= 0) {
	Addr address = addrManager->getAddress(location);
	DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
	this->getName(), curEpisode->getEpisodeId(),
	ruby::printAddress(address));

	int load_size = sizeof(Value);

	// for now, assert address is 4-byte aligned
	assert(address % load_size == 0);

	auto req = std::make_shared<Request>(address, load_size,
	0, tester->requestorId(),
	0, threadId, nullptr);
	req->setPaddr(address);
	req->setReqInstSeqNum(tester->getActionSeqNum());

	PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
	uint8_t* data = new uint8_t[load_size];
	pkt->dataDynamic(data);
	pkt->senderState = new ProtocolTester::SenderState(this);

	if (!port->sendTimingReq(pkt)) {
	panic("Not expected failed sendTimingReq\n");
	}

	// insert an outstanding load
	addOutstandingReqs(outstandingLoads, address, lane, location);

	// increment the number of outstanding ld_st requests
	pendingLdStCount++;
	}
	}

	void
	DmaThread::issueStoreOps()
	{
	assert(curAction);
	assert(curAction->getType() == Episode::Action::Type::STORE);
	// we should not have any outstanding fence or atomic op at this point
	assert(pendingFenceCount == 0);
	assert(pendingAtomicCount == 0);

	// DMA thread is a scalar thread so always set lane to zero. This allows
	// us to reuse the API for GPU threads rather than have a specific API
	// for scalar tester threads
	int lane = 0;

	Location location = curAction->getLocation(lane);
	assert(location >= AddressManager::INVALID_LOCATION);

	if (location >= 0) {
	// prepare the next value to store
	Value new_value = addrManager->getLoggedValue(location) + 1;

	Addr address = addrManager->getAddress(location);
	// must be aligned with store size
	assert(address % sizeof(Value) == 0);

	DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
	"Value %d\n", this->getName(),
	curEpisode->getEpisodeId(), ruby::printAddress(address),
	new_value);

	auto req = std::make_shared<Request>(address, sizeof(Value),
	0, tester->requestorId(), 0,
	threadId, nullptr);
	req->setPaddr(address);
	req->setReqInstSeqNum(tester->getActionSeqNum());

	PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
	uint8_t *writeData = new uint8_t[sizeof(Value)];
	for (int j = 0; j < sizeof(Value); ++j) {
	writeData[j] = ((uint8_t*)&new_value)[j];
	}
	pkt->dataDynamic(writeData);
	pkt->senderState = new ProtocolTester::SenderState(this);

	if (!port->sendTimingReq(pkt)) {
	panic("Not expecting a failed sendTimingReq\n");
	}

	// add an outstanding store
	addOutstandingReqs(outstandingStores, address, lane, location,
	new_value);

	// increment the number of outstanding ld_st requests
	pendingLdStCount++;
	}
	}

	void
	DmaThread::issueAtomicOps()
	{
	DPRINTF(ProtocolTest, "Issuing Atomic Op ...\n");

	assert(curAction);
	assert(curAction->getType() == Episode::Action::Type::ATOMIC);
	// we should not have any outstanding ops at this point
	assert(pendingFenceCount == 0);
	assert(pendingLdStCount == 0);
	assert(pendingAtomicCount == 0);

	// no-op: No DMA protocol exists with Atomics
	}

	void
	DmaThread::issueAcquireOp()
	{
	DPRINTF(ProtocolTest, "Issuing Acquire Op ...\n");

	assert(curAction);
	assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
	// we should not have any outstanding ops at this point
	assert(pendingFenceCount == 0);
	assert(pendingLdStCount == 0);
	assert(pendingAtomicCount == 0);

	// no-op: Acquire does not apply to DMA threads
	}

	void
	DmaThread::issueReleaseOp()
	{
	DPRINTF(ProtocolTest, "Issuing Release Op ...\n");

	assert(curAction);
	assert(curAction->getType() == Episode::Action::Type::RELEASE);
	// we should not have any outstanding ops at this point
	assert(pendingFenceCount == 0);
	assert(pendingLdStCount == 0);
	assert(pendingAtomicCount == 0);

	// no-op: Release does not apply to DMA threads
	}

	void
	DmaThread::hitCallback(PacketPtr pkt)
	{
	assert(pkt);
	MemCmd resp_cmd = pkt->cmd;
	Addr addr = pkt->getAddr();

	DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s -"
	" Addr %s\n", this->getName(), curEpisode->getEpisodeId(),
	resp_cmd.toString(), ruby::printAddress(addr));

	if (resp_cmd == MemCmd::SwapResp) {
	// response to a pending atomic
	assert(pendingAtomicCount > 0);
	assert(pendingLdStCount == 0);
	assert(outstandingAtomics.count(addr) > 0);

	// get return data
	Value value = *(pkt->getPtr<Value>());

	// validate atomic op return
	OutstandingReq req = popOutstandingReq(outstandingAtomics, addr);
	assert(req.lane == 0);
	validateAtomicResp(req.origLoc, req.lane, value);

	// update log table
	addrManager->updateLogTable(req.origLoc, threadId,
	curEpisode->getEpisodeId(), value,
	curTick(),
	0);

	// this Atomic is done
	pendingAtomicCount--;
	} else if (resp_cmd == MemCmd::ReadResp) {
	// response to a pending read
	assert(pendingLdStCount > 0);
	assert(pendingAtomicCount == 0);
	assert(outstandingLoads.count(addr) > 0);

	// get return data
	Value value = *(pkt->getPtr<Value>());
	OutstandingReq req = popOutstandingReq(outstandingLoads, addr);
	assert(req.lane == 0);
	validateLoadResp(req.origLoc, req.lane, value);

	// this Read is done
	pendingLdStCount--;
	} else if (resp_cmd == MemCmd::WriteResp) {
	// response to a pending write
	assert(pendingLdStCount > 0);
	assert(pendingAtomicCount == 0);

	// no need to validate Write response
	// just pop it from the outstanding req table so that subsequent
	// requests dependent on this write can proceed
	// note that unlike GpuWavefront we do decrement pendingLdStCount here
	// since the write is guaranteed to be completed in downstream memory.
	assert(outstandingStores.count(addr) > 0);
	OutstandingReq req = popOutstandingReq(outstandingStores, addr);
	assert(req.storedValue != AddressManager::INVALID_VALUE);

	// update log table
	addrManager->updateLogTable(req.origLoc, threadId,
	curEpisode->getEpisodeId(),
	req.storedValue,
	curTick(),
	0);

	// the Write is now done
	pendingLdStCount--;
	} else {
	panic("UnsupportedMemCmd response type: %s",
	resp_cmd.toString().c_str());
	}

	delete pkt->senderState;
	delete pkt;

	// record the last active cycle to check for deadlock
	lastActiveCycle = curCycle();

	// we may be able to issue an action. Let's check
	if (!threadEvent.scheduled()) {
	scheduleWakeup();
	}
	}

	} // namespace gem5