| /* |
| * Copyright (c) 2015 ARM Limited |
| * All rights reserved |
| * |
| * The license below extends only to copyright in the software and shall |
| * not be construed as granting a license to any other intellectual |
| * property including but not limited to intellectual property relating |
| * to a hardware implementation of the functionality of the software |
| * licensed hereunder. You may use the software subject to the license |
| * terms below provided that you ensure that this notice is replicated |
| * unmodified and in its entirety in all distributions of the software, |
| * modified or unmodified, in source code or in binary form. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /* @file |
| * TCP stream socket based interface class implementation for dist-gem5 runs. |
| */ |
| |
| #include "dev/net/tcp_iface.hh" |
| |
| #include <arpa/inet.h> |
| #include <netdb.h> |
| #include <netinet/tcp.h> |
| #include <sys/socket.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| #include <cerrno> |
| #include <cstring> |
| #include <vector> |
| |
| #include "base/types.hh" |
| #include "debug/DistEthernet.hh" |
| #include "debug/DistEthernetCmd.hh" |
| #include "sim/sim_exit.hh" |
| |
| #if defined(__FreeBSD__) |
| #include <netinet/in.h> |
| |
| #endif |
| |
| // MSG_NOSIGNAL does not exists on OS X |
| #if defined(__APPLE__) || defined(__MACH__) |
| #ifndef MSG_NOSIGNAL |
| #define MSG_NOSIGNAL SO_NOSIGPIPE |
| #endif |
| #endif |
| |
| using namespace std; |
| |
| std::vector<std::pair<TCPIface::NodeInfo, int> > TCPIface::nodes; |
| vector<int> TCPIface::sockRegistry; |
| int TCPIface::fdStatic = -1; |
| bool TCPIface::anyListening = false; |
| |
| TCPIface::TCPIface(string server_name, unsigned server_port, |
| unsigned dist_rank, unsigned dist_size, |
| Tick sync_start, Tick sync_repeat, |
| EventManager *em, bool use_pseudo_op, bool is_switch, |
| int num_nodes) : |
| DistIface(dist_rank, dist_size, sync_start, sync_repeat, em, use_pseudo_op, |
| is_switch, num_nodes), serverName(server_name), |
| serverPort(server_port), isSwitch(is_switch), listening(false) |
| { |
| if (is_switch && isMaster) { |
| while (!listen(serverPort)) { |
| DPRINTF(DistEthernet, "TCPIface(listen): Can't bind port %d\n", |
| serverPort); |
| serverPort++; |
| } |
| inform("tcp_iface listening on port %d", serverPort); |
| // Now accept the first connection requests from each compute node and |
| // store the node info. The compute nodes will then wait for ack |
| // messages. Ack messages will be sent by initTransport() in the |
| // appropriate order to make sure that every compute node is always |
| // connected to the same switch port. |
| NodeInfo ni; |
| for (int i = 0; i < size; i++) { |
| accept(); |
| DPRINTF(DistEthernet, "First connection, waiting for link info\n"); |
| if (!recvTCP(sock, &ni, sizeof(ni))) |
| panic("Failed to receive link info"); |
| nodes.push_back(make_pair(ni, sock)); |
| } |
| } |
| } |
| |
| bool |
| TCPIface::listen(int port) |
| { |
| if (listening) |
| panic("Socket already listening!"); |
| |
| struct sockaddr_in sockaddr; |
| int ret; |
| |
| fdStatic = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); |
| panic_if(fdStatic < 0, "socket() failed: %s", strerror(errno)); |
| |
| sockaddr.sin_family = PF_INET; |
| sockaddr.sin_addr.s_addr = INADDR_ANY; |
| sockaddr.sin_port = htons(port); |
| // finally clear sin_zero |
| memset(&sockaddr.sin_zero, 0, sizeof(sockaddr.sin_zero)); |
| ret = ::bind(fdStatic, (struct sockaddr *)&sockaddr, sizeof (sockaddr)); |
| |
| if (ret != 0) { |
| if (ret == -1 && errno != EADDRINUSE) |
| panic("ListenSocket(listen): bind() failed!"); |
| return false; |
| } |
| |
| if (::listen(fdStatic, 24) == -1) { |
| if (errno != EADDRINUSE) |
| panic("ListenSocket(listen): listen() failed!"); |
| |
| return false; |
| } |
| |
| listening = true; |
| anyListening = true; |
| return true; |
| } |
| |
| void |
| TCPIface::establishConnection() |
| { |
| static unsigned cur_rank = 0; |
| static unsigned cur_id = 0; |
| NodeInfo ni; |
| |
| if (isSwitch) { |
| if (cur_id == 0) { // first connection accepted in the ctor already |
| auto const &iface0 = |
| find_if(nodes.begin(), nodes.end(), |
| [](const pair<NodeInfo, int> &cn) -> bool { |
| return cn.first.rank == cur_rank; |
| }); |
| assert(iface0 != nodes.end()); |
| assert(iface0->first.distIfaceId == 0); |
| sock = iface0->second; |
| ni = iface0->first; |
| } else { // additional connections from the same compute node |
| accept(); |
| DPRINTF(DistEthernet, "Next connection, waiting for link info\n"); |
| if (!recvTCP(sock, &ni, sizeof(ni))) |
| panic("Failed to receive link info"); |
| assert(ni.rank == cur_rank); |
| assert(ni.distIfaceId == cur_id); |
| } |
| inform("Link okay (iface:%d -> (node:%d, iface:%d))", |
| distIfaceId, ni.rank, ni.distIfaceId); |
| if (ni.distIfaceId < ni.distIfaceNum - 1) { |
| cur_id++; |
| } else { |
| cur_rank++; |
| cur_id = 0; |
| } |
| // send ack |
| ni.distIfaceId = distIfaceId; |
| ni.distIfaceNum = distIfaceNum; |
| sendTCP(sock, &ni, sizeof(ni)); |
| } else { // this is not a switch |
| connect(); |
| // send link info |
| ni.rank = rank; |
| ni.distIfaceId = distIfaceId; |
| ni.distIfaceNum = distIfaceNum; |
| sendTCP(sock, &ni, sizeof(ni)); |
| DPRINTF(DistEthernet, "Connected, waiting for ack (distIfaceId:%d\n", |
| distIfaceId); |
| if (!recvTCP(sock, &ni, sizeof(ni))) |
| panic("Failed to receive ack"); |
| assert(ni.rank == rank); |
| inform("Link okay (iface:%d -> switch iface:%d)", distIfaceId, |
| ni.distIfaceId); |
| } |
| sockRegistry.push_back(sock); |
| } |
| |
| void |
| TCPIface::accept() |
| { |
| struct sockaddr_in sockaddr; |
| socklen_t slen = sizeof (sockaddr); |
| sock = ::accept(fdStatic, (struct sockaddr *)&sockaddr, &slen); |
| if (sock != -1) { |
| int i = 1; |
| if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&i, |
| sizeof(i)) < 0) |
| warn("ListenSocket(accept): setsockopt() TCP_NODELAY failed!"); |
| } |
| } |
| |
| void |
| TCPIface::connect() |
| { |
| struct addrinfo addr_hint, *addr_results; |
| int ret; |
| |
| string port_str = to_string(serverPort); |
| |
| sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); |
| panic_if(sock < 0, "socket() failed: %s", strerror(errno)); |
| |
| int fl = 1; |
| if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&fl, sizeof(fl)) < 0) |
| warn("ConnectSocket(connect): setsockopt() TCP_NODELAY failed!"); |
| |
| bzero(&addr_hint, sizeof(addr_hint)); |
| addr_hint.ai_family = AF_INET; |
| addr_hint.ai_socktype = SOCK_STREAM; |
| addr_hint.ai_protocol = IPPROTO_TCP; |
| |
| ret = getaddrinfo(serverName.c_str(), port_str.c_str(), |
| &addr_hint, &addr_results); |
| panic_if(ret < 0, "getaddrinf() failed: %s", strerror(errno)); |
| |
| DPRINTF(DistEthernet, "Connecting to %s:%s\n", |
| serverName.c_str(), port_str.c_str()); |
| |
| ret = ::connect(sock, (struct sockaddr *)(addr_results->ai_addr), |
| addr_results->ai_addrlen); |
| panic_if(ret < 0, "connect() failed: %s", strerror(errno)); |
| |
| freeaddrinfo(addr_results); |
| } |
| |
| TCPIface::~TCPIface() |
| { |
| int M5_VAR_USED ret; |
| |
| ret = close(sock); |
| assert(ret == 0); |
| } |
| |
| void |
| TCPIface::sendTCP(int sock, const void *buf, unsigned length) |
| { |
| ssize_t ret; |
| |
| ret = ::send(sock, buf, length, MSG_NOSIGNAL); |
| if (ret < 0) { |
| if (errno == ECONNRESET || errno == EPIPE) { |
| exitSimLoop("Message server closed connection, simulation " |
| "is exiting"); |
| } else { |
| panic("send() failed: %s", strerror(errno)); |
| } |
| } |
| panic_if(ret != length, "send() failed"); |
| } |
| |
| bool |
| TCPIface::recvTCP(int sock, void *buf, unsigned length) |
| { |
| ssize_t ret; |
| |
| ret = ::recv(sock, buf, length, MSG_WAITALL ); |
| if (ret < 0) { |
| if (errno == ECONNRESET || errno == EPIPE) |
| inform("recv(): %s", strerror(errno)); |
| else if (ret < 0) |
| panic("recv() failed: %s", strerror(errno)); |
| } else if (ret == 0) { |
| inform("recv(): Connection closed"); |
| } else if (ret != length) |
| panic("recv() failed"); |
| |
| return (ret == length); |
| } |
| |
| void |
| TCPIface::sendPacket(const Header &header, const EthPacketPtr &packet) |
| { |
| sendTCP(sock, &header, sizeof(header)); |
| sendTCP(sock, packet->data, packet->length); |
| } |
| |
| void |
| TCPIface::sendCmd(const Header &header) |
| { |
| DPRINTF(DistEthernetCmd, "TCPIface::sendCmd() type: %d\n", |
| static_cast<int>(header.msgType)); |
| // Global commands (i.e. sync request) are always sent by the master |
| // DistIface. The transfer method is simply implemented as point-to-point |
| // messages for now |
| for (auto s: sockRegistry) |
| sendTCP(s, (void*)&header, sizeof(header)); |
| } |
| |
| bool |
| TCPIface::recvHeader(Header &header) |
| { |
| bool ret = recvTCP(sock, &header, sizeof(header)); |
| DPRINTF(DistEthernetCmd, "TCPIface::recvHeader() type: %d ret: %d\n", |
| static_cast<int>(header.msgType), ret); |
| return ret; |
| } |
| |
| void |
| TCPIface::recvPacket(const Header &header, EthPacketPtr &packet) |
| { |
| packet = make_shared<EthPacketData>(header.dataPacketLength); |
| bool ret = recvTCP(sock, packet->data, header.dataPacketLength); |
| panic_if(!ret, "Error while reading socket"); |
| packet->simLength = header.simLength; |
| packet->length = header.dataPacketLength; |
| } |
| |
| void |
| TCPIface::initTransport() |
| { |
| // We cannot setup the conections in the constructor because the number |
| // of dist interfaces (per process) is unknown until the (simobject) init |
| // phase. That information is necessary for global connection ordering. |
| establishConnection(); |
| } |