blob: 82b0d3b252027451cdf1b3aebb3851f917986aa1 [file] [log] [blame]
/*
* Copyright (c) 2017 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Tuan Ta
*/
#include "cpu/testers/gpu_ruby_test/GpuWavefront.hh"
#include "debug/ProtocolTest.hh"
GpuWavefront::GpuWavefront(const Params *p)
: Thread(p)
{
cuId = p->cu_id;
threadName = "GpuWavefront(Thread ID = " + std::to_string(threadId) +
", CU ID = " + std::to_string(cuId) + ")";
threadEvent.setDesc("GpuWavefront tick");
}
GpuWavefront::~GpuWavefront()
{
}
GpuWavefront*
GpuWavefrontParams::create()
{
return new GpuWavefront(this);
}
void
GpuWavefront::issueLoadOps()
{
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::LOAD);
// we should not have any outstanding fence or atomic op at this point
assert(pendingFenceCount == 0);
assert(pendingAtomicCount == 0);
for (int lane = 0; lane < numLanes; ++lane) {
Location location = curAction->getLocation(lane);
assert(location >= AddressManager::INVALID_LOCATION);
if (location >= 0) {
Addr address = addrManager->getAddress(location);
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
this->getName(), curEpisode->getEpisodeId(),
printAddress(address));
int load_size = sizeof(Value);
// for now, assert address is 4-byte aligned
assert(address % load_size == 0);
Request *req = new Request(0, // asid
address, // virtual addr
load_size, // size in bytes
0, // flags
tester->masterId(), // port id
0, // pc
threadId, // thread_id
0);
req->setPaddr(address);
req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
setExtraRequestFlags(req);
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
uint8_t* data = new uint8_t[load_size];
pkt->dataDynamic(data);
pkt->senderState = new ProtocolTester::SenderState(this);
if (!port->sendTimingReq(pkt)) {
panic("Not expected failed sendTimingReq\n");
}
// insert an outstanding load
addOutstandingReqs(outstandingLoads, address, lane, location);
// increment the number of outstanding ld_st requests
pendingLdStCount++;
}
}
}
void
GpuWavefront::issueStoreOps()
{
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::STORE);
// we should not have any outstanding fence or atomic op at this point
assert(pendingFenceCount == 0);
assert(pendingAtomicCount == 0);
for (int lane = 0; lane < numLanes; ++lane) {
Location location = curAction->getLocation(lane);
assert(location >= AddressManager::INVALID_LOCATION);
if (location >= 0) {
// prepare the next value to store
Value new_value = addrManager->getLoggedValue(location) + 1;
Addr address = addrManager->getAddress(location);
// must be aligned with store size
assert(address % sizeof(Value) == 0);
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
"Value %d\n", this->getName(),
curEpisode->getEpisodeId(), printAddress(address),
new_value);
Request *req = new Request(0, // asid
address, // virtual addr
sizeof(Value), // size in bytes
0, // flags
tester->masterId(), // port id
0, // pc
threadId, // thread_id
0);
req->setPaddr(address);
req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
setExtraRequestFlags(req);
PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
uint8_t *writeData = new uint8_t[sizeof(Value)];
for (int j = 0; j < sizeof(Value); ++j) {
writeData[j] = ((uint8_t*)&new_value)[j];
}
pkt->dataDynamic(writeData);
pkt->senderState = new ProtocolTester::SenderState(this);
if (!port->sendTimingReq(pkt)) {
panic("Not expecting a failed sendTimingReq\n");
}
// add an outstanding store
addOutstandingReqs(outstandingStores, address, lane, location,
new_value);
// increment the number of outstanding ld_st requests
pendingLdStCount++;
}
}
}
void
GpuWavefront::issueAtomicOps()
{
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::ATOMIC);
// we should not have any outstanding ops at this point
assert(pendingFenceCount == 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
// we use atomic_inc in the tester
Request::Flags flags = Request::ATOMIC_RETURN_OP;
for (int lane = 0; lane < numLanes; ++lane) {
Location location = curAction->getLocation(lane);
assert(location >= 0);
Addr address = addrManager->getAddress(location);
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
this->getName(), curEpisode->getEpisodeId(),
printAddress(address));
// must be aligned with store size
assert(address % sizeof(Value) == 0);
Request *req = new Request(0, // asid
address, // virtual addr
sizeof(Value), // size in bytes
flags, // flags
tester->masterId(), // port id
0, // pc
threadId, // thread_id
new AtomicOpInc<Value>());
req->setPaddr(address);
req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
setExtraRequestFlags(req);
PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
uint8_t* data = new uint8_t[sizeof(Value)];
pkt->dataDynamic(data);
pkt->senderState = new ProtocolTester::SenderState(this);
if (!port->sendTimingReq(pkt)) {
panic("Not expecting failed sendTimingReq\n");
}
// add an outstanding atomic
addOutstandingReqs(outstandingAtomics, address, lane, location);
// increment the number of outstanding atomic ops
pendingAtomicCount++;
}
}
void
GpuWavefront::issueAcquireOp()
{
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Acquire\n", this->getName(),
curEpisode->getEpisodeId());
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
// we should not have any outstanding ops at this point
assert(pendingFenceCount == 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
Request *acq_req = new Request(0,
0, // vaddr
0, // request size
0, // flags
tester->masterId(),
0,
threadId,
0);
acq_req->setPaddr(0);
acq_req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
setExtraRequestFlags(acq_req);
PacketPtr pkt = new Packet(acq_req, MemCmd::MemSyncReq);
pkt->senderState = new ProtocolTester::SenderState(this);
if (!port->sendTimingReq(pkt)) {
panic("Not expecting failed sendTimingReq\n");
}
// increment the number of outstanding fence requests
pendingFenceCount++;
}
void
GpuWavefront::issueReleaseOp()
{
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Release\n", this->getName(),
curEpisode->getEpisodeId());
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::RELEASE);
// we should not have any outstanding ops at this point
assert(pendingFenceCount == 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
Request *rel_req = new Request(0,
0, // vaddr
0, // request size
0, // flags
tester->masterId(),
0,
threadId,
0);
rel_req->setPaddr(0);
rel_req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
setExtraRequestFlags(rel_req);
PacketPtr pkt = new Packet(rel_req, MemCmd::MemSyncReq);
pkt->senderState = new ProtocolTester::SenderState(this);
if (!port->sendTimingReq(pkt)) {
panic("Not expecting failed sendTimingReq\n");
}
// increment the number of outstanding fence requests
pendingFenceCount++;
}
void
GpuWavefront::hitCallback(PacketPtr pkt)
{
assert(pkt);
MemCmd resp_cmd = pkt->cmd;
Addr addr = pkt->getAddr();
DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
"Addr %s\n", this->getName(),
curEpisode->getEpisodeId(), resp_cmd.toString(),
printAddress(addr));
// whether the transaction is done after this hitCallback
bool isTransactionDone = true;
if (resp_cmd == MemCmd::MemSyncResp) {
// response to a pending fence
// no validation needed for fence responses
assert(pendingFenceCount > 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
pendingFenceCount--;
} else if (resp_cmd == MemCmd::ReadResp) {
// response to a pending read
assert(pendingLdStCount > 0);
assert(pendingAtomicCount == 0);
assert(outstandingLoads.count(addr) > 0);
// get return data
Value value = *(pkt->getPtr<Value>());
OutstandingReq req = popOutstandingReq(outstandingLoads, addr);
validateLoadResp(req.origLoc, req.lane, value);
// this Read is done
pendingLdStCount--;
} else if (resp_cmd == MemCmd::WriteResp) {
// response to a pending write
assert(pendingLdStCount > 0);
assert(pendingAtomicCount == 0);
// no need to validate Write response
// just pop it from the outstanding req table so that subsequent
// requests dependent on this write can proceed
// note that we don't decrement pendingLdStCount here yet since
// the write is not yet completed in downstream memory. Instead, we
// decrement the counter when we receive the write completion ack
assert(outstandingStores.count(addr) > 0);
OutstandingReq req = popOutstandingReq(outstandingStores, addr);
assert(req.storedValue != AddressManager::INVALID_VALUE);
// update log table
addrManager->updateLogTable(req.origLoc, threadId,
curEpisode->getEpisodeId(),
req.storedValue,
curTick(),
cuId);
// the transaction is not done yet. Waiting for write completion ack
isTransactionDone = false;
} else if (resp_cmd == MemCmd::SwapResp) {
// response to a pending atomic
assert(pendingAtomicCount > 0);
assert(pendingLdStCount == 0);
assert(outstandingAtomics.count(addr) > 0);
// get return data
Value value = *(pkt->getPtr<Value>());
// validate atomic op return
OutstandingReq req = popOutstandingReq(outstandingAtomics, addr);
validateAtomicResp(req.origLoc, req.lane, value);
// update log table
addrManager->updateLogTable(req.origLoc, threadId,
curEpisode->getEpisodeId(), value,
curTick(),
cuId);
// this Atomic is done
pendingAtomicCount--;
} else if (resp_cmd == MemCmd::MessageResp) {
// write completion ACK
assert(pendingLdStCount > 0);
assert(pendingAtomicCount == 0);
// the Write is now done
pendingLdStCount--;
} else {
panic("Unsupported MemCmd response type");
}
if (isTransactionDone) {
// no need to keep senderState and request around
delete pkt->senderState;
delete pkt->req;
}
delete pkt;
// record the last active cycle to check for deadlock
lastActiveCycle = curCycle();
// we may be able to issue an action. Let's check
if (!threadEvent.scheduled()) {
scheduleWakeup();
}
}
void
GpuWavefront::setExtraRequestFlags(Request* req)
{
// No extra request flag is set
}