blob: def40f4557331ac20a00c5cbeb77f684af911cf8 [file] [log] [blame]
/*
* Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* The GPUComputeDriver implements an HSADriver for an HSA AMD GPU
* agent. Other GPU devices, or other HSA agents, should not derive
* from this class. Instead device-specific implementations of an
* HSADriver should be provided for each unique device.
*/
#ifndef __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
#define __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
#include <cassert>
#include <cstdint>
#include <set>
#include <unordered_map>
#include "base/addr_range_map.hh"
#include "base/types.hh"
#include "enums/GfxVersion.hh"
#include "mem/request.hh"
#include "sim/emul_driver.hh"
namespace gem5
{
struct GPUComputeDriverParams;
class GPUCommandProcessor;
class PortProxy;
class ThreadContext;
class GPUComputeDriver final : public EmulatedDriver
{
public:
typedef GPUComputeDriverParams Params;
GPUComputeDriver(const Params &p);
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override;
int open(ThreadContext *tc, int mode, int flags) override;
Addr mmap(ThreadContext *tc, Addr start, uint64_t length,
int prot, int tgt_flags, int tgt_fd, off_t offset) override;
virtual void signalWakeupEvent(uint32_t event_id);
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout);
/**
* Called by the compute units right before a request is issued to ruby.
* This uses our VMAs to correctly set the MTYPE on a per-request basis.
* In real hardware, this is actually done through PTE bits in GPUVM.
* Since we are running a single VM (x86 PT) system, the MTYPE bits aren't
* available. Adding GPUVM specific bits to x86 page tables probably
* isn't the best way to proceed. For now we just have the driver set
* these until we implement a proper dual PT system.
*/
void setMtype(RequestPtr req);
int
doorbellSize()
{
switch (gfxVersion) {
case GfxVersion::gfx801:
case GfxVersion::gfx803:
case GfxVersion::gfx902:
return 4;
case GfxVersion::gfx900:
// gfx900 supports large BAR, so it has a larger doorbell
return 8;
default:
fatal("Invalid GPU type\n");
}
return 4;
}
class DriverWakeupEvent : public Event
{
public:
DriverWakeupEvent(GPUComputeDriver *gpu_driver,
ThreadContext *thrd_cntxt)
: driver(gpu_driver), tc(thrd_cntxt) {}
void process() override;
const char *description() const override;
void scheduleWakeup(Tick wakeup_delay);
private:
GPUComputeDriver *driver;
ThreadContext *tc;
};
class EventTableEntry
{
public:
EventTableEntry() :
mailBoxPtr(0), tc(nullptr), threadWaiting(false), setEvent(false)
{}
// Mail box pointer for this address. Current implementation does not
// use this mailBoxPtr to notify events but directly calls
// signalWakeupEvent from dispatcher (GPU) to notifiy events. So,
// currently this mailBoxPtr is not used. But a future implementation
// may communicate to the driver using mailBoxPtr.
Addr mailBoxPtr;
// Thread context waiting on this even. We do not support multiple
// threads waiting on an event currently.
ThreadContext *tc;
// threadWaiting = true, if some thread context is waiting on this
// event. A thread context waiting on this event is put to sleep.
bool threadWaiting;
// setEvent = true, if this event is triggered but when this event
// triggered, no thread context was waiting on it. In the future, some
// thread context will try to wait on this event but since event has
// already happened, we will not allow that thread context to go to
// sleep. The above mentioned scneario can happen when the waiting
// thread and wakeup thread race on this event and the wakeup thread
// beat the waiting thread at the driver.
bool setEvent;
};
typedef class EventTableEntry ETEntry;
private:
/**
* GPU that is controlled by this driver.
*/
GPUCommandProcessor *device;
uint32_t queueId;
bool isdGPU;
GfxVersion gfxVersion;
int dGPUPoolID;
Addr eventPage;
uint32_t eventSlotIndex;
//Event table that keeps track of events. It is indexed with event ID.
std::unordered_map<uint32_t, ETEntry> ETable;
/**
* VMA structures for GPUVM memory.
*/
AddrRangeMap<Request::CacheCoherenceFlags, 1> gpuVmas;
/**
* Mtype bits {Cached, Read Write, Shared} for caches
*/
enum MtypeFlags
{
SHARED = 0,
READ_WRITE = 1,
CACHED = 2,
NUM_MTYPE_BITS
};
Request::CacheCoherenceFlags defaultMtype;
// TCEvents map keeps trak of the events that can wakeup this thread. When
// multiple events can wake up this thread, this data structure helps to
// reset all events when one of those events wake up this thread. the
// signal events that can wake up this thread are stored in signalEvents
// whereas the timer wakeup event is stored in timerEvent.
class EventList
{
public:
EventList() : driver(nullptr), timerEvent(nullptr, nullptr) {}
EventList(GPUComputeDriver *gpu_driver, ThreadContext *thrd_cntxt)
: driver(gpu_driver), timerEvent(gpu_driver, thrd_cntxt)
{ }
void clearEvents() {
assert(driver);
for (auto event : signalEvents) {
assert(event < driver->eventSlotIndex);
driver->ETable[event].tc = nullptr;
driver->ETable[event].threadWaiting = false;
}
signalEvents.clear();
if (timerEvent.scheduled()) {
driver->deschedule(timerEvent);
}
}
GPUComputeDriver *driver;
DriverWakeupEvent timerEvent;
// The set of events that can wake up the same thread.
std::set<uint32_t> signalEvents;
};
std::unordered_map<ThreadContext *, EventList> TCEvents;
/**
* Register a region of host memory as uncacheable from the perspective
* of the dGPU.
*/
void registerUncacheableMemory(Addr start, Addr length);
/**
* The aperture (APE) base/limit pairs are set
* statically at startup by the real KFD. AMD
* x86_64 CPUs only use the areas in the 64b
* address space where VA[63:47] == 0x1ffff or
* VA[63:47] = 0. These methods generate the APE
* base/limit pairs in exactly the same way as
* the real KFD does, which ensures these APEs do
* not fall into the CPU's address space
*
* see the macros in the KFD driver in the ROCm
* Linux kernel source:
*
* drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
*/
Addr gpuVmApeBase(int gpuNum) const;
Addr gpuVmApeLimit(Addr apeBase) const;
Addr scratchApeBase(int gpuNum) const;
Addr scratchApeBaseV9() const;
Addr scratchApeLimit(Addr apeBase) const;
Addr ldsApeBase(int gpuNum) const;
Addr ldsApeBaseV9() const;
Addr ldsApeLimit(Addr apeBase) const;
/**
* Allocate/deallocate GPUVM VMAs for tracking virtual address allocations
* and properties on DGPUs. For now, we use these to track MTYPE and to
* be able to select which pages to unmap when the user provides us with
* a handle during the free ioctl.
*/
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start,
Addr length);
Addr deallocateGpuVma(Addr start);
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr);
};
} // namespace gem5
#endif // __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__