src/dev/dma_device.hh - public/gem5 - Git at Google

 /*
  * Copyright (c) 2012-2013, 2015, 2017, 2019 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
  * not be construed as granting a license to any other intellectual
  * property including but not limited to intellectual property relating
  * to a hardware implementation of the functionality of the software
  * licensed hereunder.  You may use the software subject to the license
  * terms below provided that you ensure that this notice is replicated
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
  * Copyright (c) 2004-2005 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met: redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer;
  * redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution;
  * neither the name of the copyright holders nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #ifndef __DEV_DMA_DEVICE_HH__
 #define __DEV_DMA_DEVICE_HH__

 #include <deque>
 #include <memory>

 #include "base/addr_range_map.hh"
 #include "base/chunk_generator.hh"
 #include "base/circlebuf.hh"
 #include "dev/io_device.hh"
 #include "mem/backdoor.hh"
 #include "params/DmaDevice.hh"
 #include "sim/drain.hh"
 #include "sim/system.hh"

 class ClockedObject;

 class DmaPort : public RequestPort, public Drainable
 {
   private:
     AddrRangeMap<MemBackdoorPtr, 1> memBackdoors;

     /**
      * Take the first request on the transmit list and attempt to send a timing
      * packet from it. If it is successful, schedule the sending of the next
      * packet. Otherwise remember that we are waiting for a retry.
      */
     void trySendTimingReq();

     /**
      * For timing, attempt to send the first item on the transmit
      * list, and if it is successful and there are more packets
      * waiting, then schedule the sending of the next packet. For
      * atomic, simply send and process everything on the transmit
      * list.
      */
     void sendDma();

     struct DmaReqState : public Packet::SenderState
     {
         /** Event to call on the device when this transaction (all packets)
          * complete. */
         Event *completionEvent;

         /** Total number of bytes that this transaction involves. */
         const Addr totBytes;

         /** Number of bytes that have been acked for this transaction. */
         Addr numBytes = 0;

         /** Amount to delay completion of dma by */
         const Tick delay;

         /** Object to track what chunks of bytes to send at a time. */
         ChunkGenerator gen;

         /** Pointer to a buffer for the data. */
         uint8_t *const data = nullptr;

         /** The flags to use for requests. */
         const Request::Flags flags;

         /** The requestor ID to use for requests. */
         const RequestorID id;

         /** Stream IDs. */
         const uint32_t sid;
         const uint32_t ssid;

         /** Command for the request. */
         const Packet::Command cmd;

         DmaReqState(Packet::Command _cmd, Addr addr, Addr chunk_sz, Addr tb,
                     uint8_t *_data, Request::Flags _flags, RequestorID _id,
                     uint32_t _sid, uint32_t _ssid, Event *ce, Tick _delay)
             : completionEvent(ce), totBytes(tb), delay(_delay),
               gen(addr, tb, chunk_sz), data(_data), flags(_flags), id(_id),
               sid(_sid), ssid(_ssid), cmd(_cmd)
         {}

         PacketPtr createPacket();
     };

     /** Send the next packet from a DMA request in atomic mode. */
     bool sendAtomicReq(DmaReqState *state);
     /**
      * Send the next packet from a DMA request in atomic mode, and request
      * and/or use memory backdoors if possible.
      */
     bool sendAtomicBdReq(DmaReqState *state);

     /**
      * Handle a response packet by updating the corresponding DMA
      * request state to reflect the bytes received, and also update
      * the pending request counter. If the DMA request that this
      * packet is part of is complete, then signal the completion event
      * if present, potentially with a delay added to it.
      *
      * @param pkt Response packet to handler
      * @param delay Additional delay for scheduling the completion event
      */
     void handleRespPacket(PacketPtr pkt, Tick delay=0);
     void handleResp(DmaReqState *state, Addr addr, Addr size, Tick delay=0);

   public:
     /** The device that owns this port. */
     ClockedObject *const device;

     /** The system that device/port are in. This is used to select which mode
      * we are currently operating in. */
     System *const sys;

     /** Id for all requests */
     const RequestorID requestorId;

   protected:
     /** Use a deque as we never do any insertion or removal in the middle */
     std::deque<DmaReqState *> transmitList;

     /** Event used to schedule a future sending from the transmit list. */
     EventFunctionWrapper sendEvent;

     /** Number of outstanding packets the dma port has. */
     uint32_t pendingCount = 0;

     /** The packet (if any) waiting for a retry to send. */
     PacketPtr inRetry = nullptr;

     /** Default streamId */
     const uint32_t defaultSid;

     /** Default substreamId */
     const uint32_t defaultSSid;

     const int cacheLineSize;

   protected:

     bool recvTimingResp(PacketPtr pkt) override;
     void recvReqRetry() override;

   public:

     DmaPort(ClockedObject *dev, System *s, uint32_t sid=0, uint32_t ssid=0);

     void
     dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
               uint8_t *data, Tick delay, Request::Flags flag=0);

     void
     dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
               uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay,
               Request::Flags flag=0);

     bool dmaPending() const { return pendingCount > 0; }

     DrainState drain() override;
 };

 class DmaDevice : public PioDevice
 {
    protected:
     DmaPort dmaPort;

   public:
     typedef DmaDeviceParams Params;
     DmaDevice(const Params &p);
     virtual ~DmaDevice() = default;

     void
     dmaWrite(Addr addr, int size, Event *event, uint8_t *data,
              uint32_t sid, uint32_t ssid, Tick delay=0)
     {
         dmaPort.dmaAction(MemCmd::WriteReq, addr, size, event, data,
                           sid, ssid, delay);
     }

     void
     dmaWrite(Addr addr, int size, Event *event, uint8_t *data, Tick delay=0)
     {
         dmaPort.dmaAction(MemCmd::WriteReq, addr, size, event, data, delay);
     }

     void
     dmaRead(Addr addr, int size, Event *event, uint8_t *data,
             uint32_t sid, uint32_t ssid, Tick delay=0)
     {
         dmaPort.dmaAction(MemCmd::ReadReq, addr, size, event, data,
                           sid, ssid, delay);
     }

     void
     dmaRead(Addr addr, int size, Event *event, uint8_t *data, Tick delay=0)
     {
         dmaPort.dmaAction(MemCmd::ReadReq, addr, size, event, data, delay);
     }

     bool dmaPending() const { return dmaPort.dmaPending(); }

     void init() override;

     unsigned int cacheBlockSize() const { return sys->cacheLineSize(); }

     Port &getPort(const std::string &if_name,
                   PortID idx=InvalidPortID) override;

 };

 /**
  * DMA callback class.
  *
  * Allows one to register for a callback event after a sequence of (potentially
  * non-contiguous) DMA transfers on a DmaPort completes.  Derived classes must
  * implement the process() method and use getChunkEvent() to allocate a
  * callback event for each participating DMA.
  */
 class DmaCallback : public Drainable
 {
   public:
     virtual const std::string name() const { return "DmaCallback"; }

     /**
      * DmaPort ensures that all oustanding DMA accesses have completed before
      * it finishes draining.  However, DmaChunkEvents scheduled with a delay
      * might still be sitting on the event queue.  Therefore, draining is not
      * complete until count is 0, which ensures that all outstanding
      * DmaChunkEvents associated with this DmaCallback have fired.
      */
     DrainState
     drain() override
     {
         return count ? DrainState::Draining : DrainState::Drained;
     }

   protected:
     int count = 0;

     virtual ~DmaCallback() = default;

     /**
      * Callback function invoked on completion of all chunks.
      */
     virtual void process() = 0;

   private:
     /**
      * Called by DMA engine completion event on each chunk completion.
      * Since the object may delete itself here, callers should not use
      * the object pointer after calling this function.
      */
     void
     chunkComplete()
     {
         if (--count == 0) {
             process();
             // Need to notify DrainManager that this object is finished
             // draining, even though it is immediately deleted.
             signalDrainDone();
             delete this;
         }
     }

   public:

     /**
      * Request a chunk event.  Chunks events should be provided to each DMA
      * request that wishes to participate in this DmaCallback.
      */
     Event *
     getChunkEvent()
     {
         ++count;
         return new EventFunctionWrapper([this]{ chunkComplete(); }, name(),
                                         true);
     }
 };

 /**
  * Buffered DMA engine helper class
  *
  * This class implements a simple DMA engine that feeds a FIFO
  * buffer. The size of the buffer, the maximum number of pending
  * requests and the maximum request size are all set when the engine
  * is instantiated.
  *
  * An <i>asynchronous</i> transfer of a <i>block</i> of data
  * (designated by a start address and a size) is started by calling
  * the startFill() method. The DMA engine will aggressively try to
  * keep the internal FIFO full. As soon as there is room in the FIFO
  * for more data <i>and</i> there are free request slots, a new fill
  * will be started.
  *
  * Data in the FIFO can be read back using the get() and tryGet()
  * methods. Both request a block of data from the FIFO. However, get()
  * panics if the block cannot be satisfied, while tryGet() simply
  * returns false. The latter call makes it possible to implement
  * custom buffer underrun handling.
  *
  * A simple use case would be something like this:
  * \code{.cpp}
  *     // Create a DMA engine with a 1KiB buffer. Issue up to 8 concurrent
  *     // uncacheable 64 byte (maximum) requests.
  *     DmaReadFifo *dma = new DmaReadFifo(port, 1024, 64, 8,
  *                                        Request::UNCACHEABLE);
  *
  *     // Start copying 4KiB data from 0xFF000000
  *     dma->startFill(0xFF000000, 0x1000);
  *
  *     // Some time later when there is data in the FIFO.
  *     uint8_t data[8];
  *     dma->get(data, sizeof(data))
  * \endcode
  *
  *
  * The DMA engine allows new blocks to be requested as soon as the
  * last request for a block has been sent (i.e., there is no need to
  * wait for pending requests to complete). This can be queried with
  * the atEndOfBlock() method and more advanced implementations may
  * override the onEndOfBlock() callback.
  */
 class DmaReadFifo : public Drainable, public Serializable
 {
   public:
     DmaReadFifo(DmaPort &port, size_t size,
                 unsigned max_req_size,
                 unsigned max_pending,
                 Request::Flags flags=0);

     ~DmaReadFifo();

   public: // Serializable
     void serialize(CheckpointOut &cp) const override;
     void unserialize(CheckpointIn &cp) override;

   public: // Drainable
     DrainState drain() override;

   public: // FIFO access
     /**
      * @{
      * @name FIFO access
      */
     /**
      * Try to read data from the FIFO.
      *
      * This method reads len bytes of data from the FIFO and stores
      * them in the memory location pointed to by dst. The method
      * fails, and no data is written to the buffer, if the FIFO
      * doesn't contain enough data to satisfy the request.
      *
      * @param dst Pointer to a destination buffer
      * @param len Amount of data to read.
      * @return true on success, false otherwise.
      */
     bool tryGet(uint8_t *dst, size_t len);

     template<typename T>
     bool
     tryGet(T &value)
     {
         return tryGet(static_cast<T *>(&value), sizeof(T));
     };

     /**
      * Read data from the FIFO and panic on failure.
      *
      * @see tryGet()
      *
      * @param dst Pointer to a destination buffer
      * @param len Amount of data to read.
      */
     void get(uint8_t *dst, size_t len);

     template<typename T>
     T
     get()
     {
         T value;
         get(static_cast<uint8_t *>(&value), sizeof(T));
         return value;
     };

     /** Get the amount of data stored in the FIFO */
     size_t size() const { return buffer.size(); }
     /** Flush the FIFO */
     void flush() { buffer.flush(); }

     /** @} */
   public: // FIFO fill control
     /**
      * @{
      * @name FIFO fill control
      */
     /**
      * Start filling the FIFO.
      *
      * @warn It's considered an error to call start on an active DMA
      * engine unless the last request from the active block has been
      * sent (i.e., atEndOfBlock() is true).
      *
      * @param start Physical address to copy from.
      * @param size Size of the block to copy.
      */
     void startFill(Addr start, size_t size);

     /**
      * Stop the DMA engine.
      *
      * Stop filling the FIFO and ignore incoming responses for pending
      * requests. The onEndOfBlock() callback will not be called after
      * this method has been invoked. However, once the last response
      * has been received, the onIdle() callback will still be called.
      */
     void stopFill();

     /**
      * Has the DMA engine sent out the last request for the active
      * block?
      */
     bool atEndOfBlock() const { return nextAddr == endAddr; }

     /**
      * Is the DMA engine active (i.e., are there still in-flight
      * accesses)?
      */
     bool
     isActive() const
     {
         return !(pendingRequests.empty() && atEndOfBlock());
     }

     /** @} */
   protected: // Callbacks
     /**
      * @{
      * @name Callbacks
      */
     /**
      * End of block callback
      *
      * This callback is called <i>once</i> after the last access in a
      * block has been sent. It is legal for a derived class to call
      * startFill() from this method to initiate a transfer.
      */
     virtual void onEndOfBlock() {};

     /**
      * Last response received callback
      *
      * This callback is called when the DMA engine becomes idle (i.e.,
      * there are no pending requests).
      *
      * It is possible for a DMA engine to reach the end of block and
      * become idle at the same tick. In such a case, the
      * onEndOfBlock() callback will be called first. This callback
      * will <i>NOT</i> be called if that callback initiates a new DMA transfer.
      */
     virtual void onIdle() {};

     /** @} */
   private: // Configuration
     /** Maximum request size in bytes */
     const Addr maxReqSize;
     /** Maximum FIFO size in bytes */
     const size_t fifoSize;
     /** Request flags */
     const Request::Flags reqFlags;

     DmaPort &port;

     const int cacheLineSize;

   private:
     class DmaDoneEvent : public Event
     {
       public:
         DmaDoneEvent(DmaReadFifo *_parent, size_t max_size);

         void kill();
         void cancel();
         bool canceled() const { return _canceled; }
         void reset(size_t size);
         void process();

         bool done() const { return _done; }
         size_t requestSize() const { return _requestSize; }
         const uint8_t *data() const { return _data.data(); }
         uint8_t *data() { return _data.data(); }

       private:
         DmaReadFifo *parent;
         bool _done = false;
         bool _canceled = false;
         size_t _requestSize;
         std::vector<uint8_t> _data;
     };

     typedef std::unique_ptr<DmaDoneEvent> DmaDoneEventUPtr;

     /**
      * DMA request done, handle incoming data and issue new
      * request.
      */
     void dmaDone();

     /** Handle pending requests that have been flagged as done. */
     void handlePending();

     /** Try to issue new DMA requests or bypass DMA requests*/
     void resumeFill();

     /** Try to issue new DMA requests during normal execution*/
     void resumeFillTiming();

     /** Try to bypass DMA requests in non-caching mode */
     void resumeFillBypass();

   private: // Internal state
     Fifo<uint8_t> buffer;

     Addr nextAddr = 0;
     Addr endAddr = 0;

     std::deque<DmaDoneEventUPtr> pendingRequests;
     std::deque<DmaDoneEventUPtr> freeRequests;
 };

 #endif // __DEV_DMA_DEVICE_HH__
	/*
	* Copyright (c) 2012-2013, 2015, 2017, 2019 ARM Limited
	* All rights reserved.
	*
	* The license below extends only to copyright in the software and shall
	* not be construed as granting a license to any other intellectual
	* property including but not limited to intellectual property relating
	* to a hardware implementation of the functionality of the software
	* licensed hereunder. You may use the software subject to the license
	* terms below provided that you ensure that this notice is replicated
	* unmodified and in its entirety in all distributions of the software,
	* modified or unmodified, in source code or in binary form.
	*
	* Copyright (c) 2004-2005 The Regents of The University of Michigan
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are
	* met: redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer;
	* redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution;
	* neither the name of the copyright holders nor the names of its
	* contributors may be used to endorse or promote products derived from
	* this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#ifndef __DEV_DMA_DEVICE_HH__
	#define __DEV_DMA_DEVICE_HH__

	#include <deque>
	#include <memory>

	#include "base/addr_range_map.hh"
	#include "base/chunk_generator.hh"
	#include "base/circlebuf.hh"
	#include "dev/io_device.hh"
	#include "mem/backdoor.hh"
	#include "params/DmaDevice.hh"
	#include "sim/drain.hh"
	#include "sim/system.hh"

	class ClockedObject;

	class DmaPort : public RequestPort, public Drainable
	{
	private:
	AddrRangeMap<MemBackdoorPtr, 1> memBackdoors;

	/**
	* Take the first request on the transmit list and attempt to send a timing
	* packet from it. If it is successful, schedule the sending of the next
	* packet. Otherwise remember that we are waiting for a retry.
	*/
	void trySendTimingReq();

	/**
	* For timing, attempt to send the first item on the transmit
	* list, and if it is successful and there are more packets
	* waiting, then schedule the sending of the next packet. For
	* atomic, simply send and process everything on the transmit
	* list.
	*/
	void sendDma();

	struct DmaReqState : public Packet::SenderState
	{
	/** Event to call on the device when this transaction (all packets)
	* complete. */
	Event *completionEvent;

	/** Total number of bytes that this transaction involves. */
	const Addr totBytes;

	/** Number of bytes that have been acked for this transaction. */
	Addr numBytes = 0;

	/** Amount to delay completion of dma by */
	const Tick delay;

	/** Object to track what chunks of bytes to send at a time. */
	ChunkGenerator gen;

	/** Pointer to a buffer for the data. */
	uint8_t *const data = nullptr;

	/** The flags to use for requests. */
	const Request::Flags flags;

	/** The requestor ID to use for requests. */
	const RequestorID id;

	/** Stream IDs. */
	const uint32_t sid;
	const uint32_t ssid;

	/** Command for the request. */
	const Packet::Command cmd;

	DmaReqState(Packet::Command _cmd, Addr addr, Addr chunk_sz, Addr tb,
	uint8_t *_data, Request::Flags _flags, RequestorID _id,
	uint32_t _sid, uint32_t _ssid, Event *ce, Tick _delay)
	: completionEvent(ce), totBytes(tb), delay(_delay),
	gen(addr, tb, chunk_sz), data(_data), flags(_flags), id(_id),
	sid(_sid), ssid(_ssid), cmd(_cmd)
	{}

	PacketPtr createPacket();
	};

	/** Send the next packet from a DMA request in atomic mode. */
	bool sendAtomicReq(DmaReqState *state);
	/**
	* Send the next packet from a DMA request in atomic mode, and request
	* and/or use memory backdoors if possible.
	*/
	bool sendAtomicBdReq(DmaReqState *state);

	/**
	* Handle a response packet by updating the corresponding DMA
	* request state to reflect the bytes received, and also update
	* the pending request counter. If the DMA request that this
	* packet is part of is complete, then signal the completion event
	* if present, potentially with a delay added to it.
	*
	* @param pkt Response packet to handler
	* @param delay Additional delay for scheduling the completion event
	*/
	void handleRespPacket(PacketPtr pkt, Tick delay=0);
	void handleResp(DmaReqState *state, Addr addr, Addr size, Tick delay=0);

	public:
	/** The device that owns this port. */
	ClockedObject *const device;

	/** The system that device/port are in. This is used to select which mode
	* we are currently operating in. */
	System *const sys;

	/** Id for all requests */
	const RequestorID requestorId;

	protected:
	/** Use a deque as we never do any insertion or removal in the middle */
	std::deque<DmaReqState *> transmitList;

	/** Event used to schedule a future sending from the transmit list. */
	EventFunctionWrapper sendEvent;

	/** Number of outstanding packets the dma port has. */
	uint32_t pendingCount = 0;

	/** The packet (if any) waiting for a retry to send. */
	PacketPtr inRetry = nullptr;

	/** Default streamId */
	const uint32_t defaultSid;

	/** Default substreamId */
	const uint32_t defaultSSid;

	const int cacheLineSize;

	protected:

	bool recvTimingResp(PacketPtr pkt) override;
	void recvReqRetry() override;

	public:

	DmaPort(ClockedObject dev, System s, uint32_t sid=0, uint32_t ssid=0);

	void
	dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
	uint8_t *data, Tick delay, Request::Flags flag=0);

	void
	dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
	uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay,
	Request::Flags flag=0);

	bool dmaPending() const { return pendingCount > 0; }

	DrainState drain() override;
	};

	class DmaDevice : public PioDevice
	{
	protected:
	DmaPort dmaPort;

	public:
	typedef DmaDeviceParams Params;
	DmaDevice(const Params &p);
	virtual ~DmaDevice() = default;

	void
	dmaWrite(Addr addr, int size, Event event, uint8_t data,
	uint32_t sid, uint32_t ssid, Tick delay=0)
	{
	dmaPort.dmaAction(MemCmd::WriteReq, addr, size, event, data,
	sid, ssid, delay);
	}

	void
	dmaWrite(Addr addr, int size, Event event, uint8_t data, Tick delay=0)
	{
	dmaPort.dmaAction(MemCmd::WriteReq, addr, size, event, data, delay);
	}

	void
	dmaRead(Addr addr, int size, Event event, uint8_t data,
	uint32_t sid, uint32_t ssid, Tick delay=0)
	{
	dmaPort.dmaAction(MemCmd::ReadReq, addr, size, event, data,
	sid, ssid, delay);
	}

	void
	dmaRead(Addr addr, int size, Event event, uint8_t data, Tick delay=0)
	{
	dmaPort.dmaAction(MemCmd::ReadReq, addr, size, event, data, delay);
	}

	bool dmaPending() const { return dmaPort.dmaPending(); }

	void init() override;

	unsigned int cacheBlockSize() const { return sys->cacheLineSize(); }

	Port &getPort(const std::string &if_name,
	PortID idx=InvalidPortID) override;

	};

	/**
	* DMA callback class.
	*
	* Allows one to register for a callback event after a sequence of (potentially
	* non-contiguous) DMA transfers on a DmaPort completes. Derived classes must
	* implement the process() method and use getChunkEvent() to allocate a
	* callback event for each participating DMA.
	*/
	class DmaCallback : public Drainable
	{
	public:
	virtual const std::string name() const { return "DmaCallback"; }

	/**
	* DmaPort ensures that all oustanding DMA accesses have completed before
	* it finishes draining. However, DmaChunkEvents scheduled with a delay
	* might still be sitting on the event queue. Therefore, draining is not
	* complete until count is 0, which ensures that all outstanding
	* DmaChunkEvents associated with this DmaCallback have fired.
	*/
	DrainState
	drain() override
	{
	return count ? DrainState::Draining : DrainState::Drained;
	}

	protected:
	int count = 0;

	virtual ~DmaCallback() = default;

	/**
	* Callback function invoked on completion of all chunks.
	*/
	virtual void process() = 0;

	private:
	/**
	* Called by DMA engine completion event on each chunk completion.
	* Since the object may delete itself here, callers should not use
	* the object pointer after calling this function.
	*/
	void
	chunkComplete()
	{
	if (--count == 0) {
	process();
	// Need to notify DrainManager that this object is finished
	// draining, even though it is immediately deleted.
	signalDrainDone();
	delete this;
	}
	}

	public:

	/**
	* Request a chunk event. Chunks events should be provided to each DMA
	* request that wishes to participate in this DmaCallback.
	*/
	Event *
	getChunkEvent()
	{
	++count;
	return new EventFunctionWrapper([this]{ chunkComplete(); }, name(),
	true);
	}
	};

	/**
	* Buffered DMA engine helper class
	*
	* This class implements a simple DMA engine that feeds a FIFO
	* buffer. The size of the buffer, the maximum number of pending
	* requests and the maximum request size are all set when the engine
	* is instantiated.
	*
	* An <i>asynchronous</i> transfer of a <i>block</i> of data
	* (designated by a start address and a size) is started by calling
	* the startFill() method. The DMA engine will aggressively try to
	* keep the internal FIFO full. As soon as there is room in the FIFO
	* for more data <i>and</i> there are free request slots, a new fill
	* will be started.
	*
	* Data in the FIFO can be read back using the get() and tryGet()
	* methods. Both request a block of data from the FIFO. However, get()
	* panics if the block cannot be satisfied, while tryGet() simply
	* returns false. The latter call makes it possible to implement
	* custom buffer underrun handling.
	*
	* A simple use case would be something like this:
	* \code{.cpp}
	* // Create a DMA engine with a 1KiB buffer. Issue up to 8 concurrent
	* // uncacheable 64 byte (maximum) requests.
	* DmaReadFifo *dma = new DmaReadFifo(port, 1024, 64, 8,
	* Request::UNCACHEABLE);
	*
	* // Start copying 4KiB data from 0xFF000000
	* dma->startFill(0xFF000000, 0x1000);
	*
	* // Some time later when there is data in the FIFO.
	* uint8_t data[8];
	* dma->get(data, sizeof(data))
	* \endcode
	*
	*
	* The DMA engine allows new blocks to be requested as soon as the
	* last request for a block has been sent (i.e., there is no need to
	* wait for pending requests to complete). This can be queried with
	* the atEndOfBlock() method and more advanced implementations may
	* override the onEndOfBlock() callback.
	*/
	class DmaReadFifo : public Drainable, public Serializable
	{
	public:
	DmaReadFifo(DmaPort &port, size_t size,
	unsigned max_req_size,
	unsigned max_pending,
	Request::Flags flags=0);

	~DmaReadFifo();

	public: // Serializable
	void serialize(CheckpointOut &cp) const override;
	void unserialize(CheckpointIn &cp) override;

	public: // Drainable
	DrainState drain() override;

	public: // FIFO access
	/**
	* @{
	* @name FIFO access
	*/
	/**
	* Try to read data from the FIFO.
	*
	* This method reads len bytes of data from the FIFO and stores
	* them in the memory location pointed to by dst. The method
	* fails, and no data is written to the buffer, if the FIFO
	* doesn't contain enough data to satisfy the request.
	*
	* @param dst Pointer to a destination buffer
	* @param len Amount of data to read.
	* @return true on success, false otherwise.
	*/
	bool tryGet(uint8_t *dst, size_t len);

	template<typename T>
	bool
	tryGet(T &value)
	{
	return tryGet(static_cast<T *>(&value), sizeof(T));
	};

	/**
	* Read data from the FIFO and panic on failure.
	*
	* @see tryGet()
	*
	* @param dst Pointer to a destination buffer
	* @param len Amount of data to read.
	*/
	void get(uint8_t *dst, size_t len);

	template<typename T>
	T
	get()
	{
	T value;
	get(static_cast<uint8_t *>(&value), sizeof(T));
	return value;
	};

	/** Get the amount of data stored in the FIFO */
	size_t size() const { return buffer.size(); }
	/** Flush the FIFO */
	void flush() { buffer.flush(); }

	/** @} */
	public: // FIFO fill control
	/**
	* @{
	* @name FIFO fill control
	*/
	/**
	* Start filling the FIFO.
	*
	* @warn It's considered an error to call start on an active DMA
	* engine unless the last request from the active block has been
	* sent (i.e., atEndOfBlock() is true).
	*
	* @param start Physical address to copy from.
	* @param size Size of the block to copy.
	*/
	void startFill(Addr start, size_t size);

	/**
	* Stop the DMA engine.
	*
	* Stop filling the FIFO and ignore incoming responses for pending
	* requests. The onEndOfBlock() callback will not be called after
	* this method has been invoked. However, once the last response
	* has been received, the onIdle() callback will still be called.
	*/
	void stopFill();

	/**
	* Has the DMA engine sent out the last request for the active
	* block?
	*/
	bool atEndOfBlock() const { return nextAddr == endAddr; }

	/**
	* Is the DMA engine active (i.e., are there still in-flight
	* accesses)?
	*/
	bool
	isActive() const
	{
	return !(pendingRequests.empty() && atEndOfBlock());
	}

	/** @} */
	protected: // Callbacks
	/**
	* @{
	* @name Callbacks
	*/
	/**
	* End of block callback
	*
	* This callback is called <i>once</i> after the last access in a
	* block has been sent. It is legal for a derived class to call
	* startFill() from this method to initiate a transfer.
	*/
	virtual void onEndOfBlock() {};

	/**
	* Last response received callback
	*
	* This callback is called when the DMA engine becomes idle (i.e.,
	* there are no pending requests).
	*
	* It is possible for a DMA engine to reach the end of block and
	* become idle at the same tick. In such a case, the
	* onEndOfBlock() callback will be called first. This callback
	* will <i>NOT</i> be called if that callback initiates a new DMA transfer.
	*/
	virtual void onIdle() {};

	/** @} */
	private: // Configuration
	/** Maximum request size in bytes */
	const Addr maxReqSize;
	/** Maximum FIFO size in bytes */
	const size_t fifoSize;
	/** Request flags */
	const Request::Flags reqFlags;

	DmaPort &port;

	const int cacheLineSize;

	private:
	class DmaDoneEvent : public Event
	{
	public:
	DmaDoneEvent(DmaReadFifo *_parent, size_t max_size);

	void kill();
	void cancel();
	bool canceled() const { return _canceled; }
	void reset(size_t size);
	void process();

	bool done() const { return _done; }
	size_t requestSize() const { return _requestSize; }
	const uint8_t *data() const { return _data.data(); }
	uint8_t *data() { return _data.data(); }

	private:
	DmaReadFifo *parent;
	bool _done = false;
	bool _canceled = false;
	size_t _requestSize;
	std::vector<uint8_t> _data;
	};

	typedef std::unique_ptr<DmaDoneEvent> DmaDoneEventUPtr;

	/**
	* DMA request done, handle incoming data and issue new
	* request.
	*/
	void dmaDone();

	/** Handle pending requests that have been flagged as done. */
	void handlePending();

	/** Try to issue new DMA requests or bypass DMA requests*/
	void resumeFill();

	/** Try to issue new DMA requests during normal execution*/
	void resumeFillTiming();

	/** Try to bypass DMA requests in non-caching mode */
	void resumeFillBypass();

	private: // Internal state
	Fifo<uint8_t> buffer;

	Addr nextAddr = 0;
	Addr endAddr = 0;

	std::deque<DmaDoneEventUPtr> pendingRequests;
	std::deque<DmaDoneEventUPtr> freeRequests;
	};

	#endif // __DEV_DMA_DEVICE_HH__