blob: d0afaf7a4a73b69d1d58d199f8813b40a56c4c89 [file] [log] [blame]
/*
* Copyright (c) 2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __DEV_AMDGPU_SDMA_ENGINE_HH__
#define __DEV_AMDGPU_SDMA_ENGINE_HH__
#include "base/bitunion.hh"
#include "dev/amdgpu/amdgpu_device.hh"
#include "dev/amdgpu/sdma_packets.hh"
#include "dev/dma_virt_device.hh"
#include "params/SDMAEngine.hh"
namespace gem5
{
/**
* System DMA Engine class for AMD dGPU.
*/
class SDMAEngine : public DmaVirtDevice
{
enum SDMAType
{
SDMAGfx,
SDMAPage
};
class SDMAQueue
{
Addr _base;
Addr _rptr;
Addr _wptr;
Addr _size;
Addr _rptr_wb_addr = 0;
Addr _global_rptr = 0;
bool _valid;
bool _processing;
SDMAQueue *_parent;
SDMAQueue *_ib;
public:
SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false),
_parent(nullptr), _ib(nullptr) {}
Addr base() { return _base; }
Addr rptr() { return _base + _rptr; }
Addr getRptr() { return _rptr; }
Addr wptr() { return _base + _wptr; }
Addr getWptr() { return _wptr; }
Addr size() { return _size; }
Addr rptrWbAddr() { return _rptr_wb_addr; }
Addr globalRptr() { return _global_rptr; }
bool valid() { return _valid; }
bool processing() { return _processing; }
SDMAQueue* parent() { return _parent; }
SDMAQueue* ib() { return _ib; }
void base(Addr value) { _base = value; }
void
incRptr(uint32_t value)
{
_rptr = (_rptr + value) % _size;
_global_rptr += value;
}
void
rptr(Addr value)
{
_rptr = value;
_global_rptr = value;
}
void
setWptr(Addr value)
{
_wptr = value % _size;
}
void wptr(Addr value) { _wptr = value; }
void size(Addr value) { _size = value; }
void rptrWbAddr(Addr value) { _rptr_wb_addr = value; }
void valid(bool v) { _valid = v; }
void processing(bool value) { _processing = value; }
void parent(SDMAQueue* q) { _parent = q; }
void ib(SDMAQueue* ib) { _ib = ib; }
};
/* SDMA Engine ID */
int id;
/**
* Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1,
* where RLC stands for Run List Controller. Each one of these
* can have one indirect buffer associated at any particular time.
* The switching order between queues is supposed to be page -> gfx ->
* rlc0 -> page -> gfx -> rlc1, skipping empty queues.
*/
SDMAQueue gfx, page, gfxIb, pageIb;
SDMAQueue rlc0, rlc0Ib, rlc1, rlc1Ib;
/* Gfx ring buffer registers */
uint64_t gfxBase;
uint64_t gfxRptr;
uint64_t gfxDoorbell;
uint64_t gfxDoorbellOffset;
uint64_t gfxWptr;
/* Page ring buffer registers */
uint64_t pageBase;
uint64_t pageRptr;
uint64_t pageDoorbell;
uint64_t pageDoorbellOffset;
uint64_t pageWptr;
AMDGPUDevice *gpuDevice;
VegaISA::Walker *walker;
/* processRLC will select the correct queue for the doorbell */
std::array<Addr, 2> rlcInfo{};
void processRLC0(Addr wptrOffset);
void processRLC1(Addr wptrOffset);
public:
SDMAEngine(const SDMAEngineParams &p);
void setGPUDevice(AMDGPUDevice *gpu_device);
void setId(int _id) { id = _id; }
/**
* Returns the client id for the Interrupt Handler.
*/
int getIHClientId();
/**
* Methods for translation.
*/
Addr getGARTAddr(Addr addr) const;
TranslationGenPtr translate(Addr vaddr, Addr size) override;
/**
* Translate an address in an SDMA packet. Return the device address if
* address in the packet is on the device and 0 if the the address in the
* packet is on the host/system memory.
*/
Addr getDeviceAddress(Addr raw_addr);
/**
* Inherited methods.
*/
Tick write(PacketPtr pkt) override { return 0; }
Tick read(PacketPtr pkt) override { return 0; }
AddrRangeList getAddrRanges() const override;
void serialize(CheckpointOut &cp) const override;
void unserialize(CheckpointIn &cp) override;
/**
* Given a new write ptr offset, communicated to the GPU through a doorbell
* write, the SDMA engine processes the page, gfx, rlc0, or rlc1 queue.
*/
void processGfx(Addr wptrOffset);
void processPage(Addr wptrOffset);
void processRLC(Addr doorbellOffset, Addr wptrOffset);
/**
* This method checks read and write pointers and starts decoding
* packets if the read pointer is less than the write pointer.
* It also marks a queue a being currently processing, in case the
* doorbell is rung again, the newly enqueued packets will be decoded once
* the currently processing once are finished. This is achieved by calling
* decodeNext once an entire SDMA packet has been processed.
*/
void decodeNext(SDMAQueue *q);
/**
* Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which
* encodes the opcode and sub-opcode of the packet. It also creates an
* SDMA packet object and calls the associated processing function.
*/
void decodeHeader(SDMAQueue *q, uint32_t data);
/**
* Methods that implement processing of SDMA packets
*/
void write(SDMAQueue *q, sdmaWrite *pkt);
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer);
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer);
void copy(SDMAQueue *q, sdmaCopy *pkt);
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer);
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer);
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt);
void fence(SDMAQueue *q, sdmaFence *pkt);
void fenceDone(SDMAQueue *q, sdmaFence *pkt);
void trap(SDMAQueue *q, sdmaTrap *pkt);
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header,
sdmaSRBMWrite *pkt);
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header,
sdmaPollRegMem *pkt);
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header,
sdmaPollRegMem *pkt, uint32_t dma_buffer, int count);
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func);
void ptePde(SDMAQueue *q, sdmaPtePde *pkt);
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer);
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt);
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
uint64_t *dmaBuffer);
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
uint64_t *dmaBuffer);
/**
* Methods for getting the values of SDMA MMIO registers.
*/
uint64_t getGfxBase() { return gfxBase; }
uint64_t getGfxRptr() { return gfxRptr; }
uint64_t getGfxDoorbell() { return gfxDoorbell; }
uint64_t getGfxDoorbellOffset() { return gfxDoorbellOffset; }
uint64_t getGfxWptr() { return gfxWptr; }
uint64_t getPageBase() { return pageBase; }
uint64_t getPageRptr() { return pageRptr; }
uint64_t getPageDoorbell() { return pageDoorbell; }
uint64_t getPageDoorbellOffset() { return pageDoorbellOffset; }
uint64_t getPageWptr() { return pageWptr; }
/**
* Methods for setting the values of SDMA MMIO registers.
*/
void writeMMIO(PacketPtr pkt, Addr mmio_offset);
void setGfxBaseLo(uint32_t data);
void setGfxBaseHi(uint32_t data);
void setGfxRptrLo(uint32_t data);
void setGfxRptrHi(uint32_t data);
void setGfxDoorbellLo(uint32_t data);
void setGfxDoorbellHi(uint32_t data);
void setGfxDoorbellOffsetLo(uint32_t data);
void setGfxDoorbellOffsetHi(uint32_t data);
void setGfxSize(uint64_t data);
void setGfxWptrLo(uint32_t data);
void setGfxWptrHi(uint32_t data);
void setPageBaseLo(uint32_t data);
void setPageBaseHi(uint32_t data);
void setPageRptrLo(uint32_t data);
void setPageRptrHi(uint32_t data);
void setPageDoorbellLo(uint32_t data);
void setPageDoorbellHi(uint32_t data);
void setPageDoorbellOffsetLo(uint32_t data);
void setPageDoorbellOffsetHi(uint32_t data);
void setPageSize(uint64_t data);
void setPageWptrLo(uint32_t data);
void setPageWptrHi(uint32_t data);
/**
* Methods for RLC queues
*/
void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
Addr rptr_wb_addr);
void unregisterRLCQueue(Addr doorbell);
void deallocateRLCQueues();
int cur_vmid = 0;
};
} // namespace gem5
#endif // __DEV_AMDGPU_SDMA_ENGINE_HH__