blob: 491aad0241f1a3ec6bc23b3f65a33618b6ce347e [file] [log] [blame] [edit]
/*
* Copyright (c) 2012-2013,2017-2022 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2002-2005 The Regents of The University of Michigan
* Copyright (c) 2010,2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Declaration of a request, the overall memory request consisting of
the parts of the request that are persistent throughout the transaction.
*/
#ifndef __MEM_REQUEST_HH__
#define __MEM_REQUEST_HH__
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <functional>
#include <limits>
#include <memory>
#include <vector>
#include "base/amo.hh"
#include "base/compiler.hh"
#include "base/extensible.hh"
#include "base/flags.hh"
#include "base/types.hh"
#include "cpu/inst_seq.hh"
#include "mem/htm.hh"
#include "sim/cur_tick.hh"
namespace gem5
{
/**
* Special TaskIds that are used for per-context-switch stats dumps
* and Cache Occupancy. Having too many tasks seems to be a problem
* with vector stats. 1024 seems to be a reasonable number that
* doesn't cause a problem with stats and is large enough to realistic
* benchmarks (Linux/Android boot, BBench, etc.)
*/
namespace context_switch_task_id
{
enum TaskId
{
MaxNormalTaskId = 1021, /* Maximum number of normal tasks */
Prefetcher = 1022, /* For cache lines brought in by prefetcher */
DMA = 1023, /* Mostly Table Walker */
Unknown = 1024,
NumTaskId
};
}
class Packet;
class Request;
class ThreadContext;
typedef std::shared_ptr<Request> RequestPtr;
typedef uint16_t RequestorID;
class Request : public Extensible<Request>
{
public:
typedef uint64_t FlagsType;
typedef uint8_t ArchFlagsType;
typedef gem5::Flags<FlagsType> Flags;
enum : FlagsType
{
/**
* Architecture specific flags.
*
* These bits int the flag field are reserved for
* architecture-specific code. For example, SPARC uses them to
* represent ASIs.
*/
ARCH_BITS = 0x000000FF,
/** The request was an instruction fetch. */
INST_FETCH = 0x00000100,
/** The virtual address is also the physical address. */
PHYSICAL = 0x00000200,
/**
* The request is to an uncacheable address.
*
* @note Uncacheable accesses may be reordered by CPU models. The
* STRICT_ORDER flag should be set if such reordering is
* undesirable.
*/
UNCACHEABLE = 0x00000400,
/**
* The request is required to be strictly ordered by <i>CPU
* models</i> and is non-speculative.
*
* A strictly ordered request is guaranteed to never be
* re-ordered or executed speculatively by a CPU model. The
* memory system may still reorder requests in caches unless
* the UNCACHEABLE flag is set as well.
*/
STRICT_ORDER = 0x00000800,
/** This request is made in privileged mode. */
PRIVILEGED = 0x00008000,
/**
* This is a write that is targeted and zeroing an entire
* cache block. There is no need for a read/modify/write
*/
CACHE_BLOCK_ZERO = 0x00010000,
/** The request should not cause a memory access. */
NO_ACCESS = 0x00080000,
/**
* This request will lock or unlock the accessed memory. When
* used with a load, the access locks the particular chunk of
* memory. When used with a store, it unlocks. The rule is
* that locked accesses have to be made up of a locked load,
* some operation on the data, and then a locked store.
*/
LOCKED_RMW = 0x00100000,
/** The request is a Load locked/store conditional. */
LLSC = 0x00200000,
/** This request is for a memory swap. */
MEM_SWAP = 0x00400000,
MEM_SWAP_COND = 0x00800000,
/** This request is a read which will be followed by a write. */
READ_MODIFY_WRITE = 0x00020000,
/** The request is a prefetch. */
PREFETCH = 0x01000000,
/** The request should be prefetched into the exclusive state. */
PF_EXCLUSIVE = 0x02000000,
/** The request should be marked as LRU. */
EVICT_NEXT = 0x04000000,
/** The request should be marked with ACQUIRE. */
ACQUIRE = 0x00020000,
/** The request should be marked with RELEASE. */
RELEASE = 0x00040000,
/** The request is an atomic that returns data. */
ATOMIC_RETURN_OP = 0x40000000,
/** The request is an atomic that does not return data. */
ATOMIC_NO_RETURN_OP = 0x80000000,
/** The request should be marked with KERNEL.
* Used to indicate the synchronization associated with a GPU kernel
* launch or completion.
*/
KERNEL = 0x00001000,
/** The request targets the secure memory space. */
SECURE = 0x10000000,
/** The request is a page table walk */
PT_WALK = 0x20000000,
/** The request invalidates a memory location */
INVALIDATE = 0x0000000100000000,
/** The request cleans a memory location */
CLEAN = 0x0000000200000000,
/** The request targets the point of unification */
DST_POU = 0x0000001000000000,
/** The request targets the point of coherence */
DST_POC = 0x0000002000000000,
/** Bits to define the destination of a request */
DST_BITS = 0x0000003000000000,
/** hardware transactional memory **/
/** The request starts a HTM transaction */
HTM_START = 0x0000010000000000,
/** The request commits a HTM transaction */
HTM_COMMIT = 0x0000020000000000,
/** The request cancels a HTM transaction */
HTM_CANCEL = 0x0000040000000000,
/** The request aborts a HTM transaction */
HTM_ABORT = 0x0000080000000000,
// What is the different between HTM cancel and abort?
//
// HTM_CANCEL will originate from a user instruction, e.g.
// Arm's TCANCEL or x86's XABORT. This is an explicit request
// to end a transaction and restore from the last checkpoint.
//
// HTM_ABORT is an internally generated request used to synchronize
// a transaction's failure between the core and memory subsystem.
// If a transaction fails in the core, e.g. because an instruction
// within the transaction generates an exception, the core will prepare
// itself to stop fetching/executing more instructions and send an
// HTM_ABORT to the memory subsystem before restoring the checkpoint.
// Similarly, the transaction could fail in the memory subsystem and
// this will be communicated to the core via the Packet object.
// Once the core notices, it will do the same as the above and send
// a HTM_ABORT to the memory subsystem.
// A HTM_CANCEL sent to the memory subsystem will ultimately return
// to the core which in turn will send a HTM_ABORT.
//
// This separation is necessary to ensure the disjoint components
// of the system work correctly together.
/** The Request is a TLB shootdown */
TLBI = 0x0000100000000000,
/** The Request is a TLB shootdown sync */
TLBI_SYNC = 0x0000200000000000,
/** The Request tells the CPU model that a
remote TLB Sync has been requested */
TLBI_EXT_SYNC = 0x0000400000000000,
/** The Request tells the interconnect that a
remote TLB Sync request has completed */
TLBI_EXT_SYNC_COMP = 0x0000800000000000,
/**
* These flags are *not* cleared when a Request object is
* reused (assigned a new address).
*/
STICKY_FLAGS = INST_FETCH
};
static const FlagsType STORE_NO_DATA = CACHE_BLOCK_ZERO |
CLEAN | INVALIDATE;
static const FlagsType HTM_CMD = HTM_START | HTM_COMMIT |
HTM_CANCEL | HTM_ABORT;
static const FlagsType TLBI_CMD = TLBI | TLBI_SYNC |
TLBI_EXT_SYNC | TLBI_EXT_SYNC_COMP;
/** Requestor Ids that are statically allocated
* @{*/
enum : RequestorID
{
/** This requestor id is used for writeback requests by the caches */
wbRequestorId = 0,
/**
* This requestor id is used for functional requests that
* don't come from a particular device
*/
funcRequestorId = 1,
/** This requestor id is used for message signaled interrupts */
intRequestorId = 2,
/**
* Invalid requestor id for assertion checking only. It is
* invalid behavior to ever send this id as part of a request.
*/
invldRequestorId = std::numeric_limits<RequestorID>::max()
};
/** @} */
typedef uint64_t CacheCoherenceFlagsType;
typedef gem5::Flags<CacheCoherenceFlagsType> CacheCoherenceFlags;
/**
* These bits are used to set the coherence policy for the GPU and are
* encoded in the GCN3 instructions. The GCN3 ISA defines two cache levels
* See the AMD GCN3 ISA Architecture Manual for more details.
*
* INV_L1: L1 cache invalidation
* FLUSH_L2: L2 cache flush
*
* Invalidation means to simply discard all cache contents. This can be
* done in the L1 since it is implemented as a write-through cache and
* there are other copies elsewhere in the hierarchy.
*
* For flush the contents of the cache need to be written back to memory
* when dirty and can be discarded otherwise. This operation is more
* involved than invalidation and therefore we do not flush caches with
* redundant copies of data.
*
* SLC: System Level Coherent. Accesses are forced to miss in the L2 cache
* and are coherent with system memory.
*
* GLC: Globally Coherent. Controls how reads and writes are handled by
* the L1 cache. Global here referes to the data being visible
* globally on the GPU (i.e., visible to all WGs).
*
* For atomics, the GLC bit is used to distinguish between between atomic
* return/no-return operations. These flags are used by GPUDynInst.
*/
enum : CacheCoherenceFlagsType
{
/** mem_sync_op flags */
I_CACHE_INV = 0x00000001,
INV_L1 = I_CACHE_INV,
V_CACHE_INV = 0x00000002,
K_CACHE_INV = 0x00000004,
GL1_CACHE_INV = 0x00000008,
K_CACHE_WB = 0x00000010,
FLUSH_L2 = 0x00000020,
GL2_CACHE_INV = 0x00000040,
/** user-policy flags */
SLC_BIT = 0x00000080,
DLC_BIT = 0x00000100,
GLC_BIT = 0x00000200,
/** mtype flags */
CACHED = 0x00000400,
READ_WRITE = 0x00000800,
SHARED = 0x00001000,
};
using LocalAccessor =
std::function<Cycles(ThreadContext *tc, Packet *pkt)>;
private:
typedef uint16_t PrivateFlagsType;
typedef gem5::Flags<PrivateFlagsType> PrivateFlags;
enum : PrivateFlagsType
{
/** Whether or not the size is valid. */
VALID_SIZE = 0x00000001,
/** Whether or not paddr is valid (has been written yet). */
VALID_PADDR = 0x00000002,
/** Whether or not the vaddr is valid. */
VALID_VADDR = 0x00000004,
/** Whether or not the instruction sequence number is valid. */
VALID_INST_SEQ_NUM = 0x00000008,
/** Whether or not the pc is valid. */
VALID_PC = 0x00000010,
/** Whether or not the context ID is valid. */
VALID_CONTEXT_ID = 0x00000020,
/** Whether or not the sc result is valid. */
VALID_EXTRA_DATA = 0x00000080,
/** Whether or not the stream ID and substream ID is valid. */
VALID_STREAM_ID = 0x00000100,
VALID_SUBSTREAM_ID = 0x00000200,
// hardware transactional memory
/** Whether or not the abort cause is valid. */
VALID_HTM_ABORT_CAUSE = 0x00000400,
/** Whether or not the instruction count is valid. */
VALID_INST_COUNT = 0x00000800,
/**
* These flags are *not* cleared when a Request object is reused
* (assigned a new address).
*/
STICKY_PRIVATE_FLAGS = VALID_CONTEXT_ID
};
private:
/**
* The physical address of the request. Valid only if validPaddr
* is set.
*/
Addr _paddr = 0;
/**
* The size of the request. This field must be set when vaddr or
* paddr is written via setVirt() or a phys basec constructor, so it is
* always valid as long as one of the address fields is valid.
*/
unsigned _size = 0;
/** Byte-enable mask for writes. */
std::vector<bool> _byteEnable;
/** The requestor ID which is unique in the system for all ports
* that are capable of issuing a transaction
*/
RequestorID _requestorId = invldRequestorId;
/** Flag structure for the request. */
Flags _flags;
/** Flags that control how downstream cache system maintains coherence*/
CacheCoherenceFlags _cacheCoherenceFlags;
/** Private flags for field validity checking. */
PrivateFlags privateFlags;
/**
* The time this request was started. Used to calculate
* latencies. This field is set to curTick() any time paddr or vaddr
* is written.
*/
Tick _time = MaxTick;
/**
* The task id associated with this request
*/
uint32_t _taskId = context_switch_task_id::Unknown;
/**
* The stream ID uniquely identifies a device behind the
* SMMU/IOMMU Each transaction arriving at the SMMU/IOMMU is
* associated with exactly one stream ID.
*/
uint32_t _streamId = 0;
/**
* The substream ID identifies an "execution context" within a
* device behind an SMMU/IOMMU. It's intended to map 1-to-1 to
* PCIe PASID (Process Address Space ID). The presence of a
* substream ID is optional.
*/
uint32_t _substreamId = 0;
/**
* For fullsystem GPU simulation, this determines if a requests
* destination is system (host) memory or dGPU (device) memory.
*/
bool _systemReq = 0;
/** The virtual address of the request. */
Addr _vaddr = MaxAddr;
/**
* Extra data for the request, such as the return value of
* store conditional or the compare value for a CAS. */
uint64_t _extraData = 0;
/** The context ID (for statistics, locks, and wakeups). */
ContextID _contextId = InvalidContextID;
/** program counter of initiating access; for tracing/debugging */
Addr _pc = MaxAddr;
/** Sequence number of the instruction that creates the request */
InstSeqNum _reqInstSeqNum = 0;
/** A pointer to an atomic operation */
AtomicOpFunctorPtr atomicOpFunctor = nullptr;
LocalAccessor _localAccessor;
/** The instruction count at the time this request is created */
Counter _instCount = 0;
/** The cause for HTM transaction abort */
HtmFailureFaultCause _htmAbortCause = HtmFailureFaultCause::INVALID;
public:
/**
* Minimal constructor. No fields are initialized. (Note that
* _flags and privateFlags are cleared by Flags default
* constructor.)
*/
Request() {}
/**
* Constructor for physical (e.g. device) requests. Initializes
* just physical address, size, flags, and timestamp (to curTick()).
* These fields are adequate to perform a request.
*/
Request(Addr paddr, unsigned size, Flags flags, RequestorID id) :
_paddr(paddr), _size(size), _requestorId(id), _time(curTick())
{
_flags.set(flags);
privateFlags.set(VALID_PADDR|VALID_SIZE);
_byteEnable = std::vector<bool>(size, true);
}
Request(Addr vaddr, unsigned size, Flags flags,
RequestorID id, Addr pc, ContextID cid,
AtomicOpFunctorPtr atomic_op=nullptr)
{
setVirt(vaddr, size, flags, id, pc, std::move(atomic_op));
setContext(cid);
_byteEnable = std::vector<bool>(size, true);
}
Request(const Request& other)
: Extensible<Request>(other),
_paddr(other._paddr), _size(other._size),
_byteEnable(other._byteEnable),
_requestorId(other._requestorId),
_flags(other._flags),
_cacheCoherenceFlags(other._cacheCoherenceFlags),
privateFlags(other.privateFlags),
_time(other._time),
_taskId(other._taskId), _vaddr(other._vaddr),
_extraData(other._extraData), _contextId(other._contextId),
_pc(other._pc), _reqInstSeqNum(other._reqInstSeqNum),
_localAccessor(other._localAccessor),
translateDelta(other.translateDelta),
accessDelta(other.accessDelta), depth(other.depth)
{
atomicOpFunctor.reset(other.atomicOpFunctor ?
other.atomicOpFunctor->clone() : nullptr);
}
~Request() {}
/**
* Factory method for creating memory management requests, with
* unspecified addr and size.
*/
static RequestPtr
createMemManagement(Flags flags, RequestorID id)
{
auto mgmt_req = std::make_shared<Request>();
mgmt_req->_flags.set(flags);
mgmt_req->_requestorId = id;
mgmt_req->_time = curTick();
assert(mgmt_req->isMemMgmt());
return mgmt_req;
}
/**
* Set up Context numbers.
*/
void
setContext(ContextID context_id)
{
_contextId = context_id;
privateFlags.set(VALID_CONTEXT_ID);
}
void
setStreamId(uint32_t sid)
{
_streamId = sid;
privateFlags.set(VALID_STREAM_ID);
}
void
setSubstreamId(uint32_t ssid)
{
assert(hasStreamId());
_substreamId = ssid;
privateFlags.set(VALID_SUBSTREAM_ID);
}
/**
* Set up a virtual (e.g., CPU) request in a previously
* allocated Request object.
*/
void
setVirt(Addr vaddr, unsigned size, Flags flags, RequestorID id, Addr pc,
AtomicOpFunctorPtr amo_op=nullptr)
{
_vaddr = vaddr;
_size = size;
_requestorId = id;
_pc = pc;
_time = curTick();
_flags.clear(~STICKY_FLAGS);
_flags.set(flags);
privateFlags.clear(~STICKY_PRIVATE_FLAGS);
privateFlags.set(VALID_VADDR|VALID_SIZE|VALID_PC);
depth = 0;
accessDelta = 0;
translateDelta = 0;
atomicOpFunctor = std::move(amo_op);
_localAccessor = nullptr;
}
/**
* Set just the physical address. This usually used to record the
* result of a translation.
*/
void
setPaddr(Addr paddr)
{
_paddr = paddr;
privateFlags.set(VALID_PADDR);
}
/**
* Generate two requests as if this request had been split into two
* pieces. The original request can't have been translated already.
*/
// TODO: this function is still required by TimingSimpleCPU - should be
// removed once TimingSimpleCPU will support arbitrarily long multi-line
// mem. accesses
void splitOnVaddr(Addr split_addr, RequestPtr &req1, RequestPtr &req2)
{
assert(hasVaddr());
assert(!hasPaddr());
assert(split_addr > _vaddr && split_addr < _vaddr + _size);
req1 = std::make_shared<Request>(*this);
req2 = std::make_shared<Request>(*this);
req1->_size = split_addr - _vaddr;
req2->_vaddr = split_addr;
req2->_size = _size - req1->_size;
req1->_byteEnable = std::vector<bool>(
_byteEnable.begin(),
_byteEnable.begin() + req1->_size);
req2->_byteEnable = std::vector<bool>(
_byteEnable.begin() + req1->_size,
_byteEnable.end());
}
/**
* Accessor for paddr.
*/
bool
hasPaddr() const
{
return privateFlags.isSet(VALID_PADDR);
}
Addr
getPaddr() const
{
assert(hasPaddr());
return _paddr;
}
/**
* Accessor for instruction count.
*/
bool
hasInstCount() const
{
return privateFlags.isSet(VALID_INST_COUNT);
}
Counter getInstCount() const
{
assert(hasInstCount());
return _instCount;
}
void setInstCount(Counter val)
{
privateFlags.set(VALID_INST_COUNT);
_instCount = val;
}
/**
* Time for the TLB/table walker to successfully translate this request.
*/
Tick translateDelta = 0;
/**
* Access latency to complete this memory transaction not including
* translation time.
*/
Tick accessDelta = 0;
/**
* Level of the cache hierachy where this request was responded to
* (e.g. 0 = L1; 1 = L2).
*/
mutable int depth = 0;
/**
* Accessor for size.
*/
bool
hasSize() const
{
return privateFlags.isSet(VALID_SIZE);
}
unsigned
getSize() const
{
assert(hasSize());
return _size;
}
const std::vector<bool>&
getByteEnable() const
{
return _byteEnable;
}
void
setByteEnable(const std::vector<bool>& be)
{
assert(be.size() == _size);
_byteEnable = be;
}
/**
* Returns true if the memory request is masked, which means
* there is at least one byteEnable element which is false
* (byte is masked)
*/
bool
isMasked() const
{
return std::find(
_byteEnable.begin(),
_byteEnable.end(),
false) != _byteEnable.end();
}
/** Accessor for time. */
Tick
time() const
{
assert(hasPaddr() || hasVaddr());
return _time;
}
/** Is this request for a local memory mapped resource/register? */
bool isLocalAccess() { return (bool)_localAccessor; }
/** Set the function which will enact that access. */
void setLocalAccessor(LocalAccessor acc) { _localAccessor = acc; }
/** Perform the installed local access. */
Cycles
localAccessor(ThreadContext *tc, Packet *pkt)
{
return _localAccessor(tc, pkt);
}
/**
* Accessor for atomic-op functor.
*/
bool
hasAtomicOpFunctor()
{
return (bool)atomicOpFunctor;
}
AtomicOpFunctor *
getAtomicOpFunctor()
{
assert(atomicOpFunctor);
return atomicOpFunctor.get();
}
/**
* Accessor for hardware transactional memory abort cause.
*/
bool
hasHtmAbortCause() const
{
return privateFlags.isSet(VALID_HTM_ABORT_CAUSE);
}
HtmFailureFaultCause
getHtmAbortCause() const
{
assert(hasHtmAbortCause());
return _htmAbortCause;
}
void
setHtmAbortCause(HtmFailureFaultCause val)
{
assert(isHTMAbort());
privateFlags.set(VALID_HTM_ABORT_CAUSE);
_htmAbortCause = val;
}
/** Accessor for flags. */
Flags
getFlags()
{
assert(hasPaddr() || hasVaddr());
return _flags;
}
/** Note that unlike other accessors, this function sets *specific
flags* (ORs them in); it does not assign its argument to the
_flags field. Thus this method should rightly be called
setFlags() and not just flags(). */
void
setFlags(Flags flags)
{
assert(hasPaddr() || hasVaddr());
_flags.set(flags);
}
void
clearFlags(Flags flags)
{
assert(hasPaddr() || hasVaddr());
_flags.clear(flags);
}
void
setCacheCoherenceFlags(CacheCoherenceFlags extraFlags)
{
// TODO: do mem_sync_op requests have valid paddr/vaddr?
assert(hasPaddr() || hasVaddr());
_cacheCoherenceFlags.set(extraFlags);
}
void
clearCacheCoherenceFlags(CacheCoherenceFlags extraFlags)
{
// TODO: do mem_sync_op requests have valid paddr/vaddr?
assert(hasPaddr() || hasVaddr());
_cacheCoherenceFlags.clear(extraFlags);
}
/** Accessor function for vaddr.*/
bool
hasVaddr() const
{
return privateFlags.isSet(VALID_VADDR);
}
Addr
getVaddr() const
{
assert(privateFlags.isSet(VALID_VADDR));
return _vaddr;
}
/** Accesssor for the requestor id. */
RequestorID
requestorId() const
{
return _requestorId;
}
void
requestorId(RequestorID rid)
{
_requestorId = rid;
}
uint32_t
taskId() const
{
return _taskId;
}
void
taskId(uint32_t id) {
_taskId = id;
}
/** Accessor function for architecture-specific flags.*/
ArchFlagsType
getArchFlags() const
{
assert(hasPaddr() || hasVaddr());
return _flags & ARCH_BITS;
}
/** Accessor function to check if sc result is valid. */
bool
extraDataValid() const
{
return privateFlags.isSet(VALID_EXTRA_DATA);
}
/** Accessor function for store conditional return value.*/
uint64_t
getExtraData() const
{
assert(extraDataValid());
return _extraData;
}
/** Accessor function for store conditional return value.*/
void
setExtraData(uint64_t extraData)
{
_extraData = extraData;
privateFlags.set(VALID_EXTRA_DATA);
}
bool
hasContextId() const
{
return privateFlags.isSet(VALID_CONTEXT_ID);
}
/** Accessor function for context ID.*/
ContextID
contextId() const
{
assert(hasContextId());
return _contextId;
}
/* For GPU fullsystem mark this request is not to device memory. */
void setSystemReq(bool sysReq) { _systemReq = sysReq; }
bool systemReq() const { return _systemReq; }
bool
hasStreamId() const
{
return privateFlags.isSet(VALID_STREAM_ID);
}
uint32_t
streamId() const
{
assert(hasStreamId());
return _streamId;
}
bool
hasSubstreamId() const
{
return privateFlags.isSet(VALID_SUBSTREAM_ID);
}
uint32_t
substreamId() const
{
assert(hasSubstreamId());
return _substreamId;
}
void
setPC(Addr pc)
{
privateFlags.set(VALID_PC);
_pc = pc;
}
bool
hasPC() const
{
return privateFlags.isSet(VALID_PC);
}
/** Accessor function for pc.*/
Addr
getPC() const
{
assert(hasPC());
return _pc;
}
/**
* Increment/Get the depth at which this request is responded to.
* This currently happens when the request misses in any cache level.
*/
void incAccessDepth() const { depth++; }
int getAccessDepth() const { return depth; }
/**
* Set/Get the time taken for this request to be successfully translated.
*/
void setTranslateLatency() { translateDelta = curTick() - _time; }
Tick getTranslateLatency() const { return translateDelta; }
/**
* Set/Get the time taken to complete this request's access, not including
* the time to successfully translate the request.
*/
void setAccessLatency() { accessDelta = curTick() - _time - translateDelta; }
Tick getAccessLatency() const { return accessDelta; }
/**
* Accessor for the sequence number of instruction that creates the
* request.
*/
bool
hasInstSeqNum() const
{
return privateFlags.isSet(VALID_INST_SEQ_NUM);
}
InstSeqNum
getReqInstSeqNum() const
{
assert(hasInstSeqNum());
return _reqInstSeqNum;
}
void
setReqInstSeqNum(const InstSeqNum seq_num)
{
privateFlags.set(VALID_INST_SEQ_NUM);
_reqInstSeqNum = seq_num;
}
/** Accessor functions for flags. Note that these are for testing
only; setting flags should be done via setFlags(). */
bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
bool isInstFetch() const { return _flags.isSet(INST_FETCH); }
bool
isPrefetch() const
{
return (_flags.isSet(PREFETCH | PF_EXCLUSIVE));
}
bool isPrefetchEx() const { return _flags.isSet(PF_EXCLUSIVE); }
bool isLLSC() const { return _flags.isSet(LLSC); }
bool isPriv() const { return _flags.isSet(PRIVILEGED); }
bool isLockedRMW() const { return _flags.isSet(LOCKED_RMW); }
bool isSwap() const { return _flags.isSet(MEM_SWAP | MEM_SWAP_COND); }
bool isCondSwap() const { return _flags.isSet(MEM_SWAP_COND); }
bool
isReadModifyWrite() const
{
return _flags.isSet(LOCKED_RMW | READ_MODIFY_WRITE);
}
bool isSecure() const { return _flags.isSet(SECURE); }
bool isPTWalk() const { return _flags.isSet(PT_WALK); }
bool isRelease() const { return _flags.isSet(RELEASE); }
bool isKernel() const { return _flags.isSet(KERNEL); }
bool isAtomicReturn() const { return _flags.isSet(ATOMIC_RETURN_OP); }
bool isAtomicNoReturn() const { return _flags.isSet(ATOMIC_NO_RETURN_OP); }
// hardware transactional memory
bool isHTMStart() const { return _flags.isSet(HTM_START); }
bool isHTMCommit() const { return _flags.isSet(HTM_COMMIT); }
bool isHTMCancel() const { return _flags.isSet(HTM_CANCEL); }
bool isHTMAbort() const { return _flags.isSet(HTM_ABORT); }
bool
isHTMCmd() const
{
return (isHTMStart() || isHTMCommit() ||
isHTMCancel() || isHTMAbort());
}
bool isTlbi() const { return _flags.isSet(TLBI); }
bool isTlbiSync() const { return _flags.isSet(TLBI_SYNC); }
bool isTlbiExtSync() const { return _flags.isSet(TLBI_EXT_SYNC); }
bool isTlbiExtSyncComp() const { return _flags.isSet(TLBI_EXT_SYNC_COMP); }
bool
isTlbiCmd() const
{
return (isTlbi() || isTlbiSync() ||
isTlbiExtSync() || isTlbiExtSyncComp());
}
bool isMemMgmt() const { return isTlbiCmd() || isHTMCmd(); }
bool
isAtomic() const
{
return _flags.isSet(ATOMIC_RETURN_OP) ||
_flags.isSet(ATOMIC_NO_RETURN_OP);
}
/**
* Accessor functions for the destination of a memory request. The
* destination flag can specify a point of reference for the
* operation (e.g. a cache block clean to the the point of
* unification). At the moment the destination is only used by the
* cache maintenance operations.
*/
bool isToPOU() const { return _flags.isSet(DST_POU); }
bool isToPOC() const { return _flags.isSet(DST_POC); }
Flags getDest() const { return _flags & DST_BITS; }
bool isAcquire() const { return _cacheCoherenceFlags.isSet(ACQUIRE); }
/**
* Accessor functions for the cache bypass flags. The cache bypass
* can specify which levels in the hierarchy to bypass. If GLC_BIT
* is set, the requests are globally coherent and bypass TCP.
* If SLC_BIT is set, then the requests are system level coherent
* and bypass both TCP and TCC.
*/
bool isGLCSet() const {return _cacheCoherenceFlags.isSet(GLC_BIT); }
bool isSLCSet() const {return _cacheCoherenceFlags.isSet(SLC_BIT); }
/**
* Accessor functions for the memory space configuration flags and used by
* GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
* setting extraFlags should be done via setCacheCoherenceFlags().
*/
bool isInvL1() const { return _cacheCoherenceFlags.isSet(INV_L1); }
bool
isGL2CacheFlush() const
{
return _cacheCoherenceFlags.isSet(FLUSH_L2);
}
/**
* Accessor functions to determine whether this request is part of
* a cache maintenance operation. At the moment three operations
* are supported:
* 1) A cache clean operation updates all copies of a memory
* location to the point of reference,
* 2) A cache invalidate operation invalidates all copies of the
* specified block in the memory above the point of reference,
* 3) A clean and invalidate operation is a combination of the two
* operations.
* @{ */
bool isCacheClean() const { return _flags.isSet(CLEAN); }
bool isCacheInvalidate() const { return _flags.isSet(INVALIDATE); }
bool isCacheMaintenance() const { return _flags.isSet(CLEAN|INVALIDATE); }
/** @} */
};
} // namespace gem5
#endif // __MEM_REQUEST_HH__