blob: ebdf9ae7a6a6cc48a22bd2e8fec30b1a9b53a6bd [file] [log] [blame]
/*
* Copyright (c) 2018 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* The GPUCommandProcessor (CP) is responsible for accepting commands, in
* the form of HSA AQL packets, from the HSA packet processor (HSAPP). The CP
* works with several components, including the HSAPP and the dispatcher.
* When the HSAPP sends a ready task to the CP, it will perform the necessary
* operations to extract relevant data structures from memory, such as the
* AQL queue descriptor and AQL packet, and initializes register state for the
* task's wavefronts.
*/
#ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
#define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
#include "dev/hsa/hsa_device.hh"
#include "dev/hsa/hsa_signal.hh"
#include "gpu-compute/gpu_compute_driver.hh"
#include "gpu-compute/hsa_queue_entry.hh"
struct GPUCommandProcessorParams;
class GPUDispatcher;
class Shader;
class GPUCommandProcessor : public HSADevice
{
public:
typedef GPUCommandProcessorParams Params;
GPUCommandProcessor() = delete;
GPUCommandProcessor(const Params &p);
void setShader(Shader *shader);
Shader* shader();
enum AgentCmd {
Nop = 0,
Steal = 1
};
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
Addr host_pkt_addr) override;
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
Addr host_pkt_addr) override;
void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
Addr host_pkt_addr) override;
void attachDriver(HSADriver *driver) override;
void dispatchPkt(HSAQueueEntry *task);
void signalWakeupEvent(uint32_t event_id);
Tick write(PacketPtr pkt) override { return 0; }
Tick read(PacketPtr pkt) override { return 0; }
AddrRangeList getAddrRanges() const override;
System *system();
void updateHsaSignal(Addr signal_handle, uint64_t signal_value) override;
uint64_t functionalReadHsaSignal(Addr signal_handle) override;
Addr getHsaSignalValueAddr(Addr signal_handle)
{
return signal_handle + offsetof(amd_signal_t, value);
}
Addr getHsaSignalMailboxAddr(Addr signal_handle)
{
return signal_handle + offsetof(amd_signal_t, event_mailbox_ptr);
}
Addr getHsaSignalEventAddr(Addr signal_handle)
{
return signal_handle + offsetof(amd_signal_t, event_id);
}
private:
Shader *_shader;
GPUDispatcher &dispatcher;
HSADriver *driver;
void initABI(HSAQueueEntry *task);
/**
* Perform a DMA read of the read_dispatch_id_field_base_byte_offset
* field, which follows directly after the read_dispatch_id (the read
* pointer) in the amd_hsa_queue_t struct (aka memory queue descriptor
* (MQD)), to find the base address of the MQD. The MQD is the runtime's
* soft representation of a HW queue descriptor (HQD).
*
* Any fields below the read dispatch ID in the amd_hsa_queue_t should
* not change according to the HSA standard, therefore we should be able
* to get them based on their known relative position to the read dispatch
* ID.
*/
class ReadDispIdOffsetDmaEvent : public DmaCallback
{
public:
ReadDispIdOffsetDmaEvent(GPUCommandProcessor &gpu_cmd_proc,
HSAQueueEntry *task)
: DmaCallback(), readDispIdOffset(0), gpuCmdProc(gpu_cmd_proc),
_task(task)
{
}
void
process() override
{
/**
* Now that the read pointer's offset from the base of
* the MQD is known, we can use that to calculate the
* the address of the MQD itself, the dispatcher will
* DMA that into the HSAQueueEntry when a kernel is
* launched.
*/
_task->hostAMDQueueAddr
= gpuCmdProc.hsaPP->getQueueDesc(_task->queueId())
->hostReadIndexPtr - readDispIdOffset;
/**
* DMA a copy of the MQD into the task. Some fields of
* the MQD will be used to initialize register state.
*/
auto *mqdDmaEvent = new MQDDmaEvent(gpuCmdProc, _task);
gpuCmdProc.dmaReadVirt(_task->hostAMDQueueAddr,
sizeof(_amd_queue_t), mqdDmaEvent,
&_task->amdQueue);
}
uint32_t readDispIdOffset;
private:
GPUCommandProcessor &gpuCmdProc;
HSAQueueEntry *_task;
};
/**
* Perform a DMA read of the MQD that corresponds to a hardware
* queue descriptor (HQD). We store a copy of the MQD in the
* HSAQueueEntry object so we can send a copy of it along with
* a dispatch packet, which is needed to initialize register
* state.
*/
class MQDDmaEvent : public DmaCallback
{
public:
MQDDmaEvent(GPUCommandProcessor &gpu_cmd_proc, HSAQueueEntry *task)
: DmaCallback(), gpuCmdProc(gpu_cmd_proc), _task(task)
{
}
void
process() override
{
gpuCmdProc.dispatchPkt(_task);
}
private:
GPUCommandProcessor &gpuCmdProc;
HSAQueueEntry *_task;
};
};
#endif // __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__