blob: e568549c1d8e283720c0d2128fc2ab060fad73fd [file] [log] [blame]
/*
* Copyright (c) 2015-2016 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* @file
* The interface class for dist gem5 simulations.
*
* dist-gem5 is an extension to gem5 to enable parallel simulation of a
* distributed system (e.g. simulation of a pool of machines
* connected by Ethernet links). A dist gem5 run consists of seperate gem5
* processes running in parallel. Each gem5 process executes
* the simulation of a component of the simulated distributed system.
* (An example component can be a dist-core board with an Ethernet NIC.)
* The DistIface class below provides services to transfer data and
* control messages among the gem5 processes. The main such services are
* as follows.
*
* 1. Send a data packet coming from a simulated Ethernet link. The packet
* will be transferred to (all) the target(s) gem5 processes. The send
* operation is always performed by the simulation thread, i.e. the gem5
* thread that is processing the event queue associated with the simulated
* Ethernet link.
*
* 2. Spawn a receiver thread to process messages coming in from the
* from other gem5 processes. Each simulated Ethernet link has its own
* associated receiver thread. The receiver thread saves the incoming packet
* and schedule an appropriate receive event in the event queue.
*
* 3. Schedule a global barrier event periodically to keep the gem5
* processes in sync.
* Periodic barrier event to keep peer gem5 processes in sync. The basic idea
* is that no gem5 process can go ahead further than the simulated link
* transmission delay to ensure that a corresponding receive event can always
* be scheduled for any message coming in from a peer gem5 process.
*
*
*
* This interface is an abstract class. It can work with various low level
* send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP
* stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc].
*/
#ifndef __DEV_DIST_IFACE_HH__
#define __DEV_DIST_IFACE_HH__
#include <array>
#include <mutex>
#include <queue>
#include <thread>
#include <utility>
#include "base/logging.hh"
#include "dev/net/dist_packet.hh"
#include "dev/net/etherpkt.hh"
#include "sim/core.hh"
#include "sim/drain.hh"
#include "sim/global_event.hh"
#include "sim/serialize.hh"
class EventManager;
class System;
class ThreadContext;
/**
* The interface class to talk to peer gem5 processes.
*/
class DistIface : public Drainable, public Serializable
{
public:
typedef DistHeaderPkt::Header Header;
protected:
typedef DistHeaderPkt::MsgType MsgType;
typedef DistHeaderPkt::ReqType ReqType;
private:
class SyncEvent;
/** @class Sync
* This class implements global sync operations among gem5 peer processes.
*
* @note This class is used as a singleton object (shared by all DistIface
* objects).
*/
class Sync : public Serializable
{
protected:
/**
* The lock to protect access to the Sync object.
*/
std::mutex lock;
/**
* Condition variable for the simulation thread to wait on
* until all receiver threads completes the current global
* synchronisation.
*/
std::condition_variable cv;
/**
* Number of receiver threads that not yet completed the current global
* synchronisation.
*/
unsigned waitNum;
/**
* Flag is set if exit is permitted upon sync completion
*/
bool doExit;
/**
* Flag is set if taking a ckpt is permitted upon sync completion
*/
bool doCkpt;
/**
* Flag is set if sync is to stop upon sync completion
*/
bool doStopSync;
/**
* The repeat value for the next periodic sync
*/
Tick nextRepeat;
/**
* Tick for the next periodic sync (if the event is not scheduled yet)
*/
Tick nextAt;
/**
* Flag is set if the sync is aborted (e.g. due to connection lost)
*/
bool isAbort;
friend class SyncEvent;
public:
/**
* Initialize periodic sync params.
*
* @param start Start tick for dist synchronisation
* @param repeat Frequency of dist synchronisation
*
*/
void init(Tick start, Tick repeat);
/**
* Core method to perform a full dist sync.
*
* @return true if the sync completes, false if it gets aborted
*/
virtual bool run(bool same_tick) = 0;
/**
* Callback when the receiver thread gets a sync ack message.
*
* @return false if the receiver thread needs to stop (e.g.
* simulation is to exit)
*/
virtual bool progress(Tick send_tick,
Tick next_repeat,
ReqType do_ckpt,
ReqType do_exit,
ReqType do_stop_sync) = 0;
/**
* Abort processing an on-going sync event (in case of an error, e.g.
* lost connection to a peer gem5)
*/
void abort();
virtual void requestCkpt(ReqType req) = 0;
virtual void requestExit(ReqType req) = 0;
virtual void requestStopSync(ReqType req) = 0;
void drainComplete();
virtual void serialize(CheckpointOut &cp) const override = 0;
virtual void unserialize(CheckpointIn &cp) override = 0;
};
class SyncNode: public Sync
{
private:
/**
* Exit requested
*/
ReqType needExit;
/**
* Ckpt requested
*/
ReqType needCkpt;
/**
* Sync stop requested
*/
ReqType needStopSync;
public:
SyncNode();
~SyncNode() {}
bool run(bool same_tick) override;
bool progress(Tick max_req_tick,
Tick next_repeat,
ReqType do_ckpt,
ReqType do_exit,
ReqType do_stop_sync) override;
void requestCkpt(ReqType req) override;
void requestExit(ReqType req) override;
void requestStopSync(ReqType req) override;
void serialize(CheckpointOut &cp) const override;
void unserialize(CheckpointIn &cp) override;
};
class SyncSwitch: public Sync
{
private:
/**
* Counter for recording exit requests
*/
unsigned numExitReq;
/**
* Counter for recording ckpt requests
*/
unsigned numCkptReq;
/**
* Counter for recording stop sync requests
*/
unsigned numStopSyncReq;
/**
* Number of connected simulated nodes
*/
unsigned numNodes;
public:
SyncSwitch(int num_nodes);
~SyncSwitch() {}
bool run(bool same_tick) override;
bool progress(Tick max_req_tick,
Tick next_repeat,
ReqType do_ckpt,
ReqType do_exit,
ReqType do_stop_sync) override;
void requestCkpt(ReqType) override {
panic("Switch requested checkpoint");
}
void requestExit(ReqType) override {
panic("Switch requested exit");
}
void requestStopSync(ReqType) override {
panic("Switch requested stop sync");
}
void serialize(CheckpointOut &cp) const override;
void unserialize(CheckpointIn &cp) override;
};
/**
* The global event to schedule periodic dist sync. It is used as a
* singleton object.
*
* The periodic synchronisation works as follows.
* 1. A SyncEvent is scheduled as a global event when startup() is
* called.
* 2. The process() method of the SyncEvent initiates a new barrier
* for each simulated Ethernet link.
* 3. Simulation thread(s) then waits until all receiver threads
* complete the ongoing barrier. The global sync event is done.
*/
class SyncEvent : public GlobalSyncEvent
{
private:
/**
* Flag to set when the system is draining
*/
bool _draining;
public:
/**
* Only the firstly instantiated DistIface object will
* call this constructor.
*/
SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {}
~SyncEvent() {}
/**
* Schedule the first periodic sync event.
*/
void start();
/**
* This is a global event so process() will only be called by
* exactly one simulation thread. (See further comments in the .cc
* file.)
*/
void process() override;
bool draining() const { return _draining; }
void draining(bool fl) { _draining = fl; }
};
/**
* Class to encapsulate information about data packets received.
* @note The main purpose of the class to take care of scheduling receive
* done events for the simulated network link and store incoming packets
* until they can be received by the simulated network link.
*/
class RecvScheduler : public Serializable
{
private:
/**
* Received packet descriptor. This information is used by the receive
* thread to schedule receive events and by the simulation thread to
* process those events.
*/
struct Desc : public Serializable
{
EthPacketPtr packet;
Tick sendTick;
Tick sendDelay;
Desc() : sendTick(0), sendDelay(0) {}
Desc(EthPacketPtr p, Tick s, Tick d) :
packet(p), sendTick(s), sendDelay(d) {}
Desc(const Desc &d) :
packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {}
void serialize(CheckpointOut &cp) const override;
void unserialize(CheckpointIn &cp) override;
};
/**
* The queue to store the receive descriptors.
*/
std::queue<Desc> descQueue;
/**
* The tick when the most recent receive event was processed.
*
* @note This information is necessary to simulate possible receiver
* link contention when calculating the receive tick for the next
* incoming data packet (see the calcReceiveTick() method)
*/
Tick prevRecvTick;
/**
* The receive done event for the simulated Ethernet link.
*
* @note This object is constructed by the simulated network link. We
* schedule this object for each incoming data packet.
*/
Event *recvDone;
/**
* The link delay in ticks for the simulated Ethernet link.
*
* @note This value is used for calculating the receive ticks for
* incoming data packets.
*/
Tick linkDelay;
/**
* The event manager associated with the simulated Ethernet link.
*
* @note It is used to access the event queue for scheduling receive
* done events for the link.
*/
EventManager *eventManager;
/**
* Calculate the tick to schedule the next receive done event.
*
* @param send_tick The tick the packet was sent.
* @param send_delay The simulated delay at the sender side.
* @param prev_recv_tick Tick when the last receive event was
* processed.
*
* @note This method tries to take into account possible receiver link
* contention and adjust receive tick for the incoming packets
* accordingly.
*/
Tick calcReceiveTick(Tick send_tick,
Tick send_delay,
Tick prev_recv_tick);
/**
* Flag to set if receive ticks for pending packets need to be
* recalculated due to changed link latencies at a resume
*/
bool ckptRestore;
public:
/**
* Scheduler for the incoming data packets.
*
* @param em The event manager associated with the simulated Ethernet
* link.
*/
RecvScheduler(EventManager *em) :
prevRecvTick(0), recvDone(nullptr), linkDelay(0),
eventManager(em), ckptRestore(false) {}
/**
* Initialize network link parameters.
*
* @note This method is called from the receiver thread (see
* recvThreadFunc()).
*/
void init(Event *recv_done, Tick link_delay);
/**
* Fetch the next packet that is to be received by the simulated network
* link.
*
* @note This method is called from the process() method of the receive
* done event associated with the network link.
*/
EthPacketPtr popPacket();
/**
* Push a newly arrived packet into the desc queue.
*/
void pushPacket(EthPacketPtr new_packet,
Tick send_tick,
Tick send_delay);
void serialize(CheckpointOut &cp) const override;
void unserialize(CheckpointIn &cp) override;
/**
* Adjust receive ticks for pending packets when restoring from a
* checkpoint
*
* @note Link speed and delay parameters may change at resume.
*/
void resumeRecvTicks();
};
/**
* Tick to schedule the first dist sync event.
* This is just as optimization : we do not need any dist sync
* event until the simulated NIC is brought up by the OS.
*/
Tick syncStart;
/**
* Frequency of dist sync events in ticks.
*/
Tick syncRepeat;
/**
* Receiver thread pointer.
* Each DistIface object must have exactly one receiver thread.
*/
std::thread *recvThread;
/**
* Meta information about data packets received.
*/
RecvScheduler recvScheduler;
/**
* Use pseudoOp to start synchronization.
*/
bool syncStartOnPseudoOp;
protected:
/**
* The rank of this process among the gem5 peers.
*/
unsigned rank;
/**
* The number of gem5 processes comprising this dist simulation.
*/
unsigned size;
/**
* Number of DistIface objects (i.e. dist links in this gem5 process)
*/
static unsigned distIfaceNum;
/**
* Unique id for the dist link
*/
unsigned distIfaceId;
bool isMaster;
private:
/**
* Number of receiver threads (in this gem5 process)
*/
static unsigned recvThreadsNum;
/**
* The singleton Sync object to perform dist synchronisation.
*/
static Sync *sync;
/**
* The singleton SyncEvent object to schedule periodic dist sync.
*/
static SyncEvent *syncEvent;
/**
* The very first DistIface object created becomes the master. We need
* a master to co-ordinate the global synchronisation.
*/
static DistIface *master;
/**
* System pointer used to wakeup sleeping threads when stopping sync.
*/
static System *sys;
/**
* Is this node a switch?
*/
static bool isSwitch;
private:
/**
* Send out a data packet to the remote end.
* @param header Meta info about the packet (which needs to be transferred
* to the destination alongside the packet).
* @param packet Pointer to the packet to send.
*/
virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0;
/**
* Send out a control command to the remote end.
* @param header Meta info describing the command (e.g. sync request)
*/
virtual void sendCmd(const Header &header) = 0;
/**
* Receive a header (i.e. meta info describing a data packet or a control command)
* from the remote end.
* @param header The meta info structure to store the incoming header.
*/
virtual bool recvHeader(Header &header) = 0;
/**
* Receive a packet from the remote end.
* @param header Meta info about the incoming packet (obtanied by a previous
* call to the recvHedaer() method).
* @param Pointer to packet received.
*/
virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0;
/**
* Init hook for the underlaying transport
*/
virtual void initTransport() = 0;
/**
* spawn the receiver thread.
* @param recv_done The receive done event associated with the simulated
* Ethernet link.
* @param link_delay The link delay for the simulated Ethernet link.
*/
void spawnRecvThread(const Event *recv_done, Tick link_delay);
/**
* The function executed by a receiver thread.
*/
void recvThreadFunc(Event *recv_done, Tick link_delay);
public:
/**
* ctor
* @param dist_rank Rank of this gem5 process within the dist run
* @param sync_start Start tick for dist synchronisation
* @param sync_repeat Frequency for dist synchronisation
* @param em The event manager associated with the simulated Ethernet link
*/
DistIface(unsigned dist_rank,
unsigned dist_size,
Tick sync_start,
Tick sync_repeat,
EventManager *em,
bool use_pseudo_op,
bool is_switch,
int num_nodes);
virtual ~DistIface();
/**
* Send out an Ethernet packet.
* @param pkt The Ethernet packet to send.
* @param send_delay The delay in ticks for the send completion event.
*/
void packetOut(EthPacketPtr pkt, Tick send_delay);
/**
* Fetch the packet scheduled to be received next by the simulated
* network link.
*
* @note This method is called within the process() method of the link
* receive done event. It also schedules the next receive event if the
* receive queue is not empty.
*/
EthPacketPtr packetIn() { return recvScheduler.popPacket(); }
DrainState drain() override;
void drainResume() override;
void init(const Event *e, Tick link_delay);
void startup();
void serialize(CheckpointOut &cp) const override;
void unserialize(CheckpointIn &cp) override;
/**
* Initiate the exit from the simulation.
* @param delay Delay param from the m5 exit command. If Delay is zero
* then a collaborative exit is requested (i.e. all nodes have to call
* this method before the distributed simulation can exit). If Delay is
* not zero then exit is requested asap (and it will happen at the next
* sync tick).
* @return False if we are in distributed mode (i.e. exit can happen only
* at sync), True otherwise.
*/
static bool readyToExit(Tick delay);
/**
* Initiate taking a checkpoint
* @param delay Delay param from the m5 checkpoint command. If Delay is
* zero then a collaborative checkpoint is requested (i.e. all nodes have
* to call this method before the checkpoint can be taken). If Delay is
* not zero then a checkpoint is requested asap (and it will happen at the
* next sync tick).
* @return False if we are in dist mode (i.e. exit can happen only at
* sync), True otherwise.
*/
static bool readyToCkpt(Tick delay, Tick period);
/**
* Getter for the dist rank param.
*/
static uint64_t rankParam();
/**
* Getter for the dist size param.
*/
static uint64_t sizeParam();
/**
* Trigger the master to start/stop synchronization.
*/
static void toggleSync(ThreadContext *tc);
};
#endif