blob: e3922aece53095282ff64ab82b71f9fdeb7ec3c0 [file] [log] [blame]
/*
* Copyright (c) 2011-2012, 2014, 2017-2019 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2005-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_O3_LSQ_IMPL_HH__
#define __CPU_O3_LSQ_IMPL_HH__
#include <algorithm>
#include <list>
#include <string>
#include "base/logging.hh"
#include "cpu/o3/cpu.hh"
#include "cpu/o3/lsq.hh"
#include "debug/Drain.hh"
#include "debug/Fetch.hh"
#include "debug/HtmCpu.hh"
#include "debug/LSQ.hh"
#include "debug/Writeback.hh"
#include "params/DerivO3CPU.hh"
using namespace std;
template <class Impl>
LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
: cpu(cpu_ptr), iewStage(iew_ptr),
_cacheBlocked(false),
cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
lsqPolicy(params->smtLSQPolicy),
LQEntries(params->LQEntries),
SQEntries(params->SQEntries),
maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
params->smtLSQThreshold)),
maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
params->smtLSQThreshold)),
dcachePort(this, cpu_ptr),
numThreads(params->numThreads)
{
assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
//**********************************************/
//************ Handle SMT Parameters ***********/
//**********************************************/
/* Run SMT olicy checks. */
if (lsqPolicy == SMTQueuePolicy::Dynamic) {
DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
} else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
"%i entries per LQ | %i entries per SQ\n",
maxLQEntries,maxSQEntries);
} else if (lsqPolicy == SMTQueuePolicy::Threshold) {
assert(params->smtLSQThreshold > params->LQEntries);
assert(params->smtLSQThreshold > params->SQEntries);
DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
"%i entries per LQ | %i entries per SQ\n",
maxLQEntries,maxSQEntries);
} else {
panic("Invalid LSQ sharing policy. Options are: Dynamic, "
"Partitioned, Threshold");
}
thread.reserve(numThreads);
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread.emplace_back(maxLQEntries, maxSQEntries);
thread[tid].init(cpu, iew_ptr, params, this, tid);
thread[tid].setDcachePort(&dcachePort);
}
}
template<class Impl>
std::string
LSQ<Impl>::name() const
{
return iewStage->name() + ".lsq";
}
template<class Impl>
void
LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
assert(activeThreads != 0);
}
template <class Impl>
void
LSQ<Impl>::drainSanityCheck() const
{
assert(isDrained());
for (ThreadID tid = 0; tid < numThreads; tid++)
thread[tid].drainSanityCheck();
}
template <class Impl>
bool
LSQ<Impl>::isDrained() const
{
bool drained(true);
if (!lqEmpty()) {
DPRINTF(Drain, "Not drained, LQ not empty.\n");
drained = false;
}
if (!sqEmpty()) {
DPRINTF(Drain, "Not drained, SQ not empty.\n");
drained = false;
}
return drained;
}
template <class Impl>
void
LSQ<Impl>::takeOverFrom()
{
usedStorePorts = 0;
_cacheBlocked = false;
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].takeOverFrom();
}
}
template <class Impl>
void
LSQ<Impl>::tick()
{
// Re-issue loads which got blocked on the per-cycle load ports limit.
if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
iewStage->cacheUnblocked();
usedLoadPorts = 0;
usedStorePorts = 0;
}
template<class Impl>
bool
LSQ<Impl>::cacheBlocked() const
{
return _cacheBlocked;
}
template<class Impl>
void
LSQ<Impl>::cacheBlocked(bool v)
{
_cacheBlocked = v;
}
template<class Impl>
bool
LSQ<Impl>::cachePortAvailable(bool is_load) const
{
bool ret;
if (is_load) {
ret = usedLoadPorts < cacheLoadPorts;
} else {
ret = usedStorePorts < cacheStorePorts;
}
return ret;
}
template<class Impl>
void
LSQ<Impl>::cachePortBusy(bool is_load)
{
assert(cachePortAvailable(is_load));
if (is_load) {
usedLoadPorts++;
} else {
usedStorePorts++;
}
}
template<class Impl>
void
LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
{
ThreadID tid = load_inst->threadNumber;
thread[tid].insertLoad(load_inst);
}
template<class Impl>
void
LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
{
ThreadID tid = store_inst->threadNumber;
thread[tid].insertStore(store_inst);
}
template<class Impl>
Fault
LSQ<Impl>::executeLoad(const DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
return thread[tid].executeLoad(inst);
}
template<class Impl>
Fault
LSQ<Impl>::executeStore(const DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
return thread[tid].executeStore(inst);
}
template<class Impl>
void
LSQ<Impl>::writebackStores()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (numStoresToWB(tid) > 0) {
DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
"available for Writeback.\n", tid, numStoresToWB(tid));
}
thread[tid].writebackStores();
}
}
template<class Impl>
bool
LSQ<Impl>::violation()
{
/* Answers: Does Anybody Have a Violation?*/
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (thread[tid].violation())
return true;
}
return false;
}
template <class Impl>
void
LSQ<Impl>::recvReqRetry()
{
iewStage->cacheUnblocked();
cacheBlocked(false);
for (ThreadID tid : *activeThreads) {
thread[tid].recvRetry();
}
}
template <class Impl>
void
LSQ<Impl>::completeDataAccess(PacketPtr pkt)
{
auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
thread[cpu->contextToThread(senderState->contextId())]
.completeDataAccess(pkt);
}
template <class Impl>
bool
LSQ<Impl>::recvTimingResp(PacketPtr pkt)
{
if (pkt->isError())
DPRINTF(LSQ, "Got error packet back for address: %#X\n",
pkt->getAddr());
auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
panic_if(!senderState, "Got packet back with unknown sender state\n");
thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
if (pkt->isInvalidate()) {
// This response also contains an invalidate; e.g. this can be the case
// if cmd is ReadRespWithInvalidate.
//
// The calling order between completeDataAccess and checkSnoop matters.
// By calling checkSnoop after completeDataAccess, we ensure that the
// fault set by checkSnoop is not lost. Calling writeback (more
// specifically inst->completeAcc) in completeDataAccess overwrites
// fault, and in case this instruction requires squashing (as
// determined by checkSnoop), the ReExec fault set by checkSnoop would
// be lost otherwise.
DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
pkt->getAddr());
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].checkSnoop(pkt);
}
}
// Update the LSQRequest state (this may delete the request)
senderState->request()->packetReplied();
return true;
}
template <class Impl>
void
LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
{
DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
// must be a snoop
if (pkt->isInvalidate()) {
DPRINTF(LSQ, "received invalidation for addr:%#x\n",
pkt->getAddr());
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].checkSnoop(pkt);
}
}
}
template<class Impl>
int
LSQ<Impl>::getCount()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += getCount(tid);
}
return total;
}
template<class Impl>
int
LSQ<Impl>::numLoads()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += numLoads(tid);
}
return total;
}
template<class Impl>
int
LSQ<Impl>::numStores()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += thread[tid].numStores();
}
return total;
}
template<class Impl>
unsigned
LSQ<Impl>::numFreeLoadEntries()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += thread[tid].numFreeLoadEntries();
}
return total;
}
template<class Impl>
unsigned
LSQ<Impl>::numFreeStoreEntries()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += thread[tid].numFreeStoreEntries();
}
return total;
}
template<class Impl>
unsigned
LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
{
return thread[tid].numFreeLoadEntries();
}
template<class Impl>
unsigned
LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
{
return thread[tid].numFreeStoreEntries();
}
template<class Impl>
bool
LSQ<Impl>::isFull()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!(thread[tid].lqFull() || thread[tid].sqFull()))
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::isFull(ThreadID tid)
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
if (lsqPolicy == SMTQueuePolicy::Dynamic)
return isFull();
else
return thread[tid].lqFull() || thread[tid].sqFull();
}
template<class Impl>
bool
LSQ<Impl>::isEmpty() const
{
return lqEmpty() && sqEmpty();
}
template<class Impl>
bool
LSQ<Impl>::lqEmpty() const
{
list<ThreadID>::const_iterator threads = activeThreads->begin();
list<ThreadID>::const_iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!thread[tid].lqEmpty())
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::sqEmpty() const
{
list<ThreadID>::const_iterator threads = activeThreads->begin();
list<ThreadID>::const_iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!thread[tid].sqEmpty())
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::lqFull()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!thread[tid].lqFull())
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::lqFull(ThreadID tid)
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
if (lsqPolicy == SMTQueuePolicy::Dynamic)
return lqFull();
else
return thread[tid].lqFull();
}
template<class Impl>
bool
LSQ<Impl>::sqFull()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!sqFull(tid))
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::sqFull(ThreadID tid)
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
if (lsqPolicy == SMTQueuePolicy::Dynamic)
return sqFull();
else
return thread[tid].sqFull();
}
template<class Impl>
bool
LSQ<Impl>::isStalled()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!thread[tid].isStalled())
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::isStalled(ThreadID tid)
{
if (lsqPolicy == SMTQueuePolicy::Dynamic)
return isStalled();
else
return thread[tid].isStalled();
}
template<class Impl>
bool
LSQ<Impl>::hasStoresToWB()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (hasStoresToWB(tid))
return true;
}
return false;
}
template<class Impl>
bool
LSQ<Impl>::willWB()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (willWB(tid))
return true;
}
return false;
}
template<class Impl>
void
LSQ<Impl>::dumpInsts() const
{
list<ThreadID>::const_iterator threads = activeThreads->begin();
list<ThreadID>::const_iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
thread[tid].dumpInsts();
}
}
template<class Impl>
Fault
LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
uint64_t *res, AtomicOpFunctorPtr amo_op,
const std::vector<bool>& byte_enable)
{
// This comming request can be either load, store or atomic.
// Atomic request has a corresponding pointer to its atomic memory
// operation
M5_VAR_USED bool isAtomic = !isLoad && amo_op;
ThreadID tid = cpu->contextToThread(inst->contextId());
auto cacheLineSize = cpu->cacheLineSize();
bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
LSQRequest* req = nullptr;
// Atomic requests that access data across cache line boundary are
// currently not allowed since the cache does not guarantee corresponding
// atomic memory operations to be executed atomically across a cache line.
// For ISAs such as x86 that supports cross-cache-line atomic instructions,
// the cache needs to be modified to perform atomic update to both cache
// lines. For now, such cross-line update is not supported.
assert(!isAtomic || (isAtomic && !needs_burst));
const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
if (inst->translationStarted()) {
req = inst->savedReq;
assert(req);
} else {
if (htm_cmd) {
assert(addr == 0x0lu);
assert(size == 8);
req = new HtmCmdRequest(&thread[tid], inst, flags);
} else if (needs_burst) {
req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
size, flags, data, res);
} else {
req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
size, flags, data, res, std::move(amo_op));
}
assert(req);
req->_byteEnable = byte_enable;
inst->setRequest();
req->taskId(cpu->taskId());
// There might be fault from a previous execution attempt if this is
// a strictly ordered load
inst->getFault() = NoFault;
req->initiateTranslation();
}
/* This is the place were instructions get the effAddr. */
if (req->isTranslationComplete()) {
if (req->isMemAccessRequired()) {
inst->effAddr = req->getVaddr();
inst->effSize = size;
inst->effAddrValid(true);
if (cpu->checker) {
inst->reqToVerify = std::make_shared<Request>(*req->request());
}
Fault fault;
if (isLoad)
fault = cpu->read(req, inst->lqIdx);
else
fault = cpu->write(req, data, inst->sqIdx);
// inst->getFault() may have the first-fault of a
// multi-access split request at this point.
// Overwrite that only if we got another type of fault
// (e.g. re-exec).
if (fault != NoFault)
inst->getFault() = fault;
} else if (isLoad) {
inst->setMemAccPredicate(false);
// Commit will have to clean up whatever happened. Set this
// instruction as executed.
inst->setExecuted();
}
}
if (inst->traceData)
inst->traceData->setMem(addr, size, flags);
return inst->getFault();
}
template<class Impl>
void
LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode)
{
_fault.push_back(fault);
numInTranslationFragments = 0;
numTranslatedFragments = 1;
/* If the instruction has been squahsed, let the request know
* as it may have to self-destruct. */
if (_inst->isSquashed()) {
this->squashTranslation();
} else {
_inst->strictlyOrdered(req->isStrictlyOrdered());
flags.set(Flag::TranslationFinished);
if (fault == NoFault) {
_inst->physEffAddr = req->getPaddr();
_inst->memReqFlags = req->getFlags();
if (req->isCondSwap()) {
assert(_res);
req->setExtraData(*_res);
}
setState(State::Request);
} else {
setState(State::Fault);
}
LSQRequest::_inst->fault = fault;
LSQRequest::_inst->translationCompleted(true);
}
}
template<class Impl>
void
LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode)
{
int i;
for (i = 0; i < _requests.size() && _requests[i] != req; i++);
assert(i < _requests.size());
_fault[i] = fault;
numInTranslationFragments--;
numTranslatedFragments++;
if (fault == NoFault)
mainReq->setFlags(req->getFlags());
if (numTranslatedFragments == _requests.size()) {
if (_inst->isSquashed()) {
this->squashTranslation();
} else {
_inst->strictlyOrdered(mainReq->isStrictlyOrdered());
flags.set(Flag::TranslationFinished);
_inst->translationCompleted(true);
for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
if (i > 0) {
_inst->physEffAddr = request(0)->getPaddr();
_inst->memReqFlags = mainReq->getFlags();
if (mainReq->isCondSwap()) {
assert (i == _fault.size());
assert(_res);
mainReq->setExtraData(*_res);
}
if (i == _fault.size()) {
_inst->fault = NoFault;
setState(State::Request);
} else {
_inst->fault = _fault[i];
setState(State::PartialFault);
}
} else {
_inst->fault = _fault[0];
setState(State::Fault);
}
}
}
}
template<class Impl>
void
LSQ<Impl>::SingleDataRequest::initiateTranslation()
{
assert(_requests.size() == 0);
this->addRequest(_addr, _size, _byteEnable);
if (_requests.size() > 0) {
_requests.back()->setReqInstSeqNum(_inst->seqNum);
_requests.back()->taskId(_taskId);
_inst->translationStarted(true);
setState(State::Translation);
flags.set(Flag::TranslationStarted);
_inst->savedReq = this;
sendFragmentToTranslation(0);
} else {
_inst->setMemAccPredicate(false);
}
}
template<class Impl>
PacketPtr
LSQ<Impl>::SplitDataRequest::mainPacket()
{
return _mainPacket;
}
template<class Impl>
RequestPtr
LSQ<Impl>::SplitDataRequest::mainRequest()
{
return mainReq;
}
template<class Impl>
void
LSQ<Impl>::SplitDataRequest::initiateTranslation()
{
auto cacheLineSize = _port.cacheLineSize();
Addr base_addr = _addr;
Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
uint32_t size_so_far = 0;
mainReq = std::make_shared<Request>(base_addr,
_size, _flags, _inst->requestorId(),
_inst->instAddr(), _inst->contextId());
mainReq->setByteEnable(_byteEnable);
// Paddr is not used in mainReq. However, we will accumulate the flags
// from the sub requests into mainReq by calling setFlags() in finish().
// setFlags() assumes that paddr is set so flip the paddr valid bit here to
// avoid a potential assert in setFlags() when we call it from finish().
mainReq->setPaddr(0);
/* Get the pre-fix, possibly unaligned. */
auto it_start = _byteEnable.begin();
auto it_end = _byteEnable.begin() + (next_addr - base_addr);
this->addRequest(base_addr, next_addr - base_addr,
std::vector<bool>(it_start, it_end));
size_so_far = next_addr - base_addr;
/* We are block aligned now, reading whole blocks. */
base_addr = next_addr;
while (base_addr != final_addr) {
auto it_start = _byteEnable.begin() + size_so_far;
auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
this->addRequest(base_addr, cacheLineSize,
std::vector<bool>(it_start, it_end));
size_so_far += cacheLineSize;
base_addr += cacheLineSize;
}
/* Deal with the tail. */
if (size_so_far < _size) {
auto it_start = _byteEnable.begin() + size_so_far;
auto it_end = _byteEnable.end();
this->addRequest(base_addr, _size - size_so_far,
std::vector<bool>(it_start, it_end));
}
if (_requests.size() > 0) {
/* Setup the requests and send them to translation. */
for (auto& r: _requests) {
r->setReqInstSeqNum(_inst->seqNum);
r->taskId(_taskId);
}
_inst->translationStarted(true);
setState(State::Translation);
flags.set(Flag::TranslationStarted);
this->_inst->savedReq = this;
numInTranslationFragments = 0;
numTranslatedFragments = 0;
_fault.resize(_requests.size());
for (uint32_t i = 0; i < _requests.size(); i++) {
sendFragmentToTranslation(i);
}
} else {
_inst->setMemAccPredicate(false);
}
}
template<class Impl>
void
LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
{
numInTranslationFragments++;
_port.dTLB()->translateTiming(
this->request(i),
this->_inst->thread->getTC(), this,
this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
}
template<class Impl>
bool
LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
{
assert(_numOutstandingPackets == 1);
auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
flags.set(Flag::Complete);
state->outstanding--;
assert(pkt == _packets.front());
_port.completeDataAccess(pkt);
return true;
}
template<class Impl>
bool
LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
{
auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
uint32_t pktIdx = 0;
while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
pktIdx++;
assert(pktIdx < _packets.size());
numReceivedPackets++;
state->outstanding--;
if (numReceivedPackets == _packets.size()) {
flags.set(Flag::Complete);
/* Assemble packets. */
PacketPtr resp = isLoad()
? Packet::createRead(mainReq)
: Packet::createWrite(mainReq);
if (isLoad())
resp->dataStatic(_inst->memData);
else
resp->dataStatic(_data);
resp->senderState = _senderState;
_port.completeDataAccess(resp);
delete resp;
}
return true;
}
template<class Impl>
void
LSQ<Impl>::SingleDataRequest::buildPackets()
{
assert(_senderState);
/* Retries do not create new packets. */
if (_packets.size() == 0) {
_packets.push_back(
isLoad()
? Packet::createRead(request())
: Packet::createWrite(request()));
_packets.back()->dataStatic(_inst->memData);
_packets.back()->senderState = _senderState;
// hardware transactional memory
// If request originates in a transaction (not necessarily a HtmCmd),
// then the packet should be marked as such.
if (_inst->inHtmTransactionalState()) {
_packets.back()->setHtmTransactional(
_inst->getHtmTransactionUid());
DPRINTF(HtmCpu,
"HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
isLoad() ? "LD" : "ST",
_inst->instAddr(),
_packets.back()->req->hasVaddr() ?
_packets.back()->req->getVaddr() : 0lu,
_packets.back()->getAddr(),
_inst->getHtmTransactionUid());
}
}
assert(_packets.size() == 1);
}
template<class Impl>
void
LSQ<Impl>::SplitDataRequest::buildPackets()
{
/* Extra data?? */
Addr base_address = _addr;
if (_packets.size() == 0) {
/* New stuff */
if (isLoad()) {
_mainPacket = Packet::createRead(mainReq);
_mainPacket->dataStatic(_inst->memData);
// hardware transactional memory
// If request originates in a transaction,
// packet should be marked as such
if (_inst->inHtmTransactionalState()) {
_mainPacket->setHtmTransactional(
_inst->getHtmTransactionUid());
DPRINTF(HtmCpu,
"HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
_inst->instAddr(),
_mainPacket->req->hasVaddr() ?
_mainPacket->req->getVaddr() : 0lu,
_mainPacket->getAddr(),
_inst->getHtmTransactionUid());
}
}
for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
RequestPtr r = _requests[i];
PacketPtr pkt = isLoad() ? Packet::createRead(r)
: Packet::createWrite(r);
ptrdiff_t offset = r->getVaddr() - base_address;
if (isLoad()) {
pkt->dataStatic(_inst->memData + offset);
} else {
uint8_t* req_data = new uint8_t[r->getSize()];
std::memcpy(req_data,
_inst->memData + offset,
r->getSize());
pkt->dataDynamic(req_data);
}
pkt->senderState = _senderState;
_packets.push_back(pkt);
// hardware transactional memory
// If request originates in a transaction,
// packet should be marked as such
if (_inst->inHtmTransactionalState()) {
_packets.back()->setHtmTransactional(
_inst->getHtmTransactionUid());
DPRINTF(HtmCpu,
"HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
isLoad() ? "LD" : "ST",
i+1,
_inst->instAddr(),
_packets.back()->req->hasVaddr() ?
_packets.back()->req->getVaddr() : 0lu,
_packets.back()->getAddr(),
_inst->getHtmTransactionUid());
}
}
}
assert(_packets.size() > 0);
}
template<class Impl>
void
LSQ<Impl>::SingleDataRequest::sendPacketToCache()
{
assert(_numOutstandingPackets == 0);
if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
_numOutstandingPackets = 1;
}
template<class Impl>
void
LSQ<Impl>::SplitDataRequest::sendPacketToCache()
{
/* Try to send the packets. */
while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
lsqUnit()->trySendPacket(isLoad(),
_packets.at(numReceivedPackets + _numOutstandingPackets))) {
_numOutstandingPackets++;
}
}
template<class Impl>
Cycles
LSQ<Impl>::SingleDataRequest::handleLocalAccess(
ThreadContext *thread, PacketPtr pkt)
{
return pkt->req->localAccessor(thread, pkt);
}
template<class Impl>
Cycles
LSQ<Impl>::SplitDataRequest::handleLocalAccess(
ThreadContext *thread, PacketPtr mainPkt)
{
Cycles delay(0);
unsigned offset = 0;
for (auto r: _requests) {
PacketPtr pkt =
new Packet(r, isLoad() ? MemCmd::ReadReq : MemCmd::WriteReq);
pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
Cycles d = r->localAccessor(thread, pkt);
if (d > delay)
delay = d;
offset += r->getSize();
delete pkt;
}
return delay;
}
template<class Impl>
bool
LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
{
return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
}
/**
* Caches may probe into the load-store queue to enforce memory ordering
* guarantees. This method supports probes by providing a mechanism to compare
* snoop messages with requests tracked by the load-store queue.
*
* Consistency models must enforce ordering constraints. TSO, for instance,
* must prevent memory reorderings except stores which are reordered after
* loads. The reordering restrictions negatively impact performance by
* cutting down on memory level parallelism. However, the core can regain
* performance by generating speculative loads. Speculative loads may issue
* without affecting correctness if precautions are taken to handle invalid
* memory orders. The load queue must squash under memory model violations.
* Memory model violations may occur when block ownership is granted to
* another core or the block cannot be accurately monitored by the load queue.
*/
template<class Impl>
bool
LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
{
bool is_hit = false;
for (auto &r: _requests) {
/**
* The load-store queue handles partial faults which complicates this
* method. Physical addresses must be compared between requests and
* snoops. Some requests will not have a valid physical address, since
* partial faults may have outstanding translations. Therefore, the
* existence of a valid request address must be checked before
* comparing block hits. We assume no pipeline squash is needed if a
* valid request address does not exist.
*/
if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
is_hit = true;
break;
}
}
return is_hit;
}
template <class Impl>
bool
LSQ<Impl>::DcachePort::recvTimingResp(PacketPtr pkt)
{
return lsq->recvTimingResp(pkt);
}
template <class Impl>
void
LSQ<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup(tid);
}
}
lsq->recvTimingSnoopReq(pkt);
}
template <class Impl>
void
LSQ<Impl>::DcachePort::recvReqRetry()
{
lsq->recvReqRetry();
}
template<class Impl>
LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit* port,
const DynInstPtr& inst,
const Request::Flags& flags_) :
SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
nullptr, nullptr, nullptr)
{
assert(_requests.size() == 0);
this->addRequest(_addr, _size, _byteEnable);
if (_requests.size() > 0) {
_requests.back()->setReqInstSeqNum(_inst->seqNum);
_requests.back()->taskId(_taskId);
_requests.back()->setPaddr(_addr);
_requests.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
_inst->strictlyOrdered(_requests.back()->isStrictlyOrdered());
_inst->fault = NoFault;
_inst->physEffAddr = _requests.back()->getPaddr();
_inst->memReqFlags = _requests.back()->getFlags();
_inst->savedReq = this;
setState(State::Translation);
} else {
panic("unexpected behaviour");
}
}
template<class Impl>
void
LSQ<Impl>::HtmCmdRequest::initiateTranslation()
{
// Transaction commands are implemented as loads to avoid significant
// changes to the cpu and memory interfaces
// The virtual and physical address uses a dummy value of 0x00
// Address translation does not really occur thus the code below
flags.set(Flag::TranslationStarted);
flags.set(Flag::TranslationFinished);
_inst->translationStarted(true);
_inst->translationCompleted(true);
setState(State::Request);
}
template<class Impl>
void
LSQ<Impl>::HtmCmdRequest::finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode)
{
panic("unexpected behaviour");
}
#endif//__CPU_O3_LSQ_IMPL_HH__