/*
 * Copyright (c) 2018 Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * For use for simulation and test purposes only
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its
 * contributors may be used to endorse or promote products derived from this
 * software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/**
 * @file
 * The GPUCommandProcessor (CP) is responsible for accepting commands, in
 * the form of HSA AQL packets, from the HSA packet processor (HSAPP). The CP
 * works with several components, including the HSAPP and the dispatcher.
 * When the HSAPP sends a ready task to the CP, it will perform the necessary
 * operations to extract relevant data structures from memory, such as the
 * AQL queue descriptor and AQL packet, and initializes register state for the
 * task's wavefronts.
 */

#ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
#define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__

#include "dev/hsa/hsa_device.hh"
#include "dev/hsa/hsa_signal.hh"
#include "gpu-compute/gpu_compute_driver.hh"
#include "gpu-compute/hsa_queue_entry.hh"

struct GPUCommandProcessorParams;
class GPUDispatcher;
class Shader;

class GPUCommandProcessor : public HSADevice
{
  public:
    typedef GPUCommandProcessorParams Params;

    GPUCommandProcessor() = delete;
    GPUCommandProcessor(const Params &p);

    void setShader(Shader *shader);
    Shader* shader();

    enum AgentCmd {
      Nop = 0,
      Steal = 1
    };

    void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
                           Addr host_pkt_addr) override;
    void submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
                           Addr host_pkt_addr) override;
    void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
                         Addr host_pkt_addr) override;
    void attachDriver(HSADriver *driver) override;
    void dispatchPkt(HSAQueueEntry *task);
    void signalWakeupEvent(uint32_t event_id);

    Tick write(PacketPtr pkt) override { return 0; }
    Tick read(PacketPtr pkt) override { return 0; }
    AddrRangeList getAddrRanges() const override;
    System *system();

    void updateHsaSignal(Addr signal_handle, uint64_t signal_value) override;

    uint64_t functionalReadHsaSignal(Addr signal_handle) override;

    Addr getHsaSignalValueAddr(Addr signal_handle)
    {
        return signal_handle + offsetof(amd_signal_t, value);
    }

    Addr getHsaSignalMailboxAddr(Addr signal_handle)
    {
        return signal_handle + offsetof(amd_signal_t, event_mailbox_ptr);
    }

    Addr getHsaSignalEventAddr(Addr signal_handle)
    {
        return signal_handle + offsetof(amd_signal_t, event_id);
    }

  private:
    Shader *_shader;
    GPUDispatcher &dispatcher;
    HSADriver *driver;

    void initABI(HSAQueueEntry *task);

    /**
     * Perform a DMA read of the read_dispatch_id_field_base_byte_offset
     * field, which follows directly after the read_dispatch_id (the read
     * pointer) in the amd_hsa_queue_t struct (aka memory queue descriptor
     * (MQD)), to find the base address of the MQD. The MQD is the runtime's
     * soft representation of a HW queue descriptor (HQD).
     *
     * Any fields below the read dispatch ID in the amd_hsa_queue_t should
     * not change according to the HSA standard, therefore we should be able
     * to get them based on their known relative position to the read dispatch
     * ID.
     */
    class ReadDispIdOffsetDmaEvent : public DmaCallback
    {
      public:
        ReadDispIdOffsetDmaEvent(GPUCommandProcessor &gpu_cmd_proc,
                                 HSAQueueEntry *task)
            : DmaCallback(), readDispIdOffset(0), gpuCmdProc(gpu_cmd_proc),
              _task(task)
        {
        }

        void
        process() override
        {
            /**
             * Now that the read pointer's offset from the base of
             * the MQD is known, we can use that to calculate the
             * the address of the MQD itself, the dispatcher will
             * DMA that into the HSAQueueEntry when a kernel is
             * launched.
             */
            _task->hostAMDQueueAddr
                = gpuCmdProc.hsaPP->getQueueDesc(_task->queueId())
                    ->hostReadIndexPtr - readDispIdOffset;

            /**
             * DMA a copy of the MQD into the task. Some fields of
             * the MQD will be used to initialize register state.
             */
            auto *mqdDmaEvent = new MQDDmaEvent(gpuCmdProc, _task);
            gpuCmdProc.dmaReadVirt(_task->hostAMDQueueAddr,
                                   sizeof(_amd_queue_t), mqdDmaEvent,
                                   &_task->amdQueue);
        }

        uint32_t readDispIdOffset;

      private:
        GPUCommandProcessor &gpuCmdProc;
        HSAQueueEntry *_task;
    };

    /**
     * Perform a DMA read of the MQD that corresponds to a hardware
     * queue descriptor (HQD). We store a copy of the MQD in the
     * HSAQueueEntry object so we can send a copy of it along with
     * a dispatch packet, which is needed to initialize register
     * state.
     */
    class MQDDmaEvent : public DmaCallback
    {
      public:
        MQDDmaEvent(GPUCommandProcessor &gpu_cmd_proc, HSAQueueEntry *task)
            : DmaCallback(), gpuCmdProc(gpu_cmd_proc), _task(task)
        {
        }

        void
        process() override
        {
            gpuCmdProc.dispatchPkt(_task);
        }

      private:
        GPUCommandProcessor &gpuCmdProc;
        HSAQueueEntry *_task;
    };
};

#endif // __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
