| /* |
| * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. |
| * All rights reserved. |
| * |
| * For use for simulation and test purposes only |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * 3. Neither the name of the copyright holder nor the names of its |
| * contributors may be used to endorse or promote products derived from this |
| * software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "cpu/testers/gpu_ruby_test/protocol_tester.hh" |
| |
| #include <algorithm> |
| #include <ctime> |
| #include <fstream> |
| #include <random> |
| |
| #include "cpu/testers/gpu_ruby_test/cpu_thread.hh" |
| #include "cpu/testers/gpu_ruby_test/gpu_thread.hh" |
| #include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh" |
| #include "debug/ProtocolTest.hh" |
| #include "mem/request.hh" |
| #include "sim/sim_exit.hh" |
| #include "sim/system.hh" |
| |
| ProtocolTester::ProtocolTester(const Params &p) |
| : ClockedObject(p), |
| _requestorId(p.system->getRequestorId(this)), |
| numCpuPorts(p.port_cpu_ports_connection_count), |
| numVectorPorts(p.port_cu_vector_ports_connection_count), |
| numSqcPorts(p.port_cu_sqc_ports_connection_count), |
| numScalarPorts(p.port_cu_scalar_ports_connection_count), |
| numCusPerSqc(p.cus_per_sqc), |
| numCusPerScalar(p.cus_per_scalar), |
| numWfsPerCu(p.wavefronts_per_cu), |
| numWisPerWf(p.workitems_per_wavefront), |
| numAtomicLocs(p.num_atomic_locations), |
| numNormalLocsPerAtomic(p.num_normal_locs_per_atomic), |
| episodeLength(p.episode_length), |
| maxNumEpisodes(p.max_num_episodes), |
| debugTester(p.debug_tester), |
| cpuThreads(p.cpu_threads), |
| wfs(p.wavefronts) |
| { |
| int idx = 0; // global port index |
| |
| numCpus = numCpuPorts; // 1 cpu port per CPU |
| numCus = numVectorPorts; // 1 vector port per CU |
| |
| // create all physical cpu's data ports |
| for (int i = 0; i < numCpuPorts; ++i) { |
| DPRINTF(ProtocolTest, "Creating %s\n", |
| csprintf("%s-cpuPort%d", name(), i)); |
| cpuPorts.push_back(new SeqPort(csprintf("%s-cpuPort%d", name(), i), |
| this, i, idx)); |
| idx++; |
| } |
| |
| // create all physical gpu's data ports |
| for (int i = 0; i < numVectorPorts; ++i) { |
| DPRINTF(ProtocolTest, "Creating %s\n", |
| csprintf("%s-cuVectorPort%d", name(), i)); |
| cuVectorPorts.push_back(new SeqPort(csprintf("%s-cuVectorPort%d", |
| name(), i), |
| this, i, idx)); |
| idx++; |
| } |
| |
| for (int i = 0; i < numScalarPorts; ++i) { |
| DPRINTF(ProtocolTest, "Creating %s\n", |
| csprintf("%s-cuScalarPort%d", name(), i)); |
| cuScalarPorts.push_back(new SeqPort(csprintf("%s-cuScalarPort%d", |
| name(), i), |
| this, i, idx)); |
| idx++; |
| } |
| |
| for (int i = 0; i < numSqcPorts; ++i) { |
| DPRINTF(ProtocolTest, "Creating %s\n", |
| csprintf("%s-cuSqcPort%d", name(), i)); |
| cuSqcPorts.push_back(new SeqPort(csprintf("%s-cuSqcPort%d", |
| name(), i), |
| this, i, idx)); |
| idx++; |
| } |
| |
| // create an address manager |
| addrManager = new AddressManager(numAtomicLocs, |
| numNormalLocsPerAtomic); |
| nextEpisodeId = 0; |
| |
| if (!debugTester) |
| warn("Data race check is not enabled\n"); |
| |
| sentExitSignal = false; |
| |
| // set random seed number |
| if (p.random_seed != 0) { |
| srand(p.random_seed); |
| } else { |
| srand(time(NULL)); |
| } |
| |
| actionCount = 0; |
| |
| // create a new log file |
| logFile = simout.create(p.log_file); |
| assert(logFile); |
| |
| // print test configs |
| std::stringstream ss; |
| ss << "GPU Ruby test's configurations" << std::endl |
| << "\tNumber of CPUs: " << numCpus << std::endl |
| << "\tNumber of CUs: " << numCus << std::endl |
| << "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl |
| << "\tWavefront size: " << numWisPerWf << std::endl |
| << "\tNumber of atomic locations: " << numAtomicLocs << std::endl |
| << "\tNumber of non-atomic locations: " |
| << numNormalLocsPerAtomic * numAtomicLocs << std::endl |
| << "\tEpisode length: " << episodeLength << std::endl |
| << "\tTest length (max number of episodes): " << maxNumEpisodes |
| << std::endl |
| << "\tRandom seed: " << p.random_seed |
| << std::endl; |
| |
| ccprintf(*(logFile->stream()), "%s", ss.str()); |
| logFile->stream()->flush(); |
| } |
| |
| ProtocolTester::~ProtocolTester() |
| { |
| for (int i = 0; i < cpuPorts.size(); ++i) |
| delete cpuPorts[i]; |
| for (int i = 0; i < cuVectorPorts.size(); ++i) |
| delete cuVectorPorts[i]; |
| for (int i = 0; i < cuScalarPorts.size(); ++i) |
| delete cuScalarPorts[i]; |
| for (int i = 0; i < cuSqcPorts.size(); ++i) |
| delete cuSqcPorts[i]; |
| delete addrManager; |
| |
| // close the log file |
| simout.close(logFile); |
| } |
| |
| void |
| ProtocolTester::init() |
| { |
| DPRINTF(ProtocolTest, "Attach threads to ports\n"); |
| |
| // connect cpu threads to cpu's ports |
| for (int cpu_id = 0; cpu_id < numCpus; ++cpu_id) { |
| cpuThreads[cpu_id]->attachGpuThreadToPorts(this, |
| static_cast<SeqPort*>(cpuPorts[cpu_id])); |
| cpuThreads[cpu_id]->scheduleWakeup(); |
| cpuThreads[cpu_id]->scheduleDeadlockCheckEvent(); |
| } |
| |
| // connect gpu wavefronts to gpu's ports |
| int wfId = 0; |
| int vectorPortId = 0; |
| int sqcPortId = 0; |
| int scalarPortId = 0; |
| |
| for (int cu_id = 0; cu_id < numCus; ++cu_id) { |
| vectorPortId = cu_id; |
| sqcPortId = cu_id/numCusPerSqc; |
| scalarPortId = cu_id/numCusPerScalar; |
| |
| for (int i = 0; i < numWfsPerCu; ++i) { |
| wfId = cu_id * numWfsPerCu + i; |
| wfs[wfId]->attachGpuThreadToPorts(this, |
| static_cast<SeqPort*>(cuVectorPorts[vectorPortId]), |
| static_cast<SeqPort*>(cuSqcPorts[sqcPortId]), |
| static_cast<SeqPort*>(cuScalarPorts[scalarPortId])); |
| wfs[wfId]->scheduleWakeup(); |
| wfs[wfId]->scheduleDeadlockCheckEvent(); |
| } |
| } |
| } |
| |
| Port& |
| ProtocolTester::getPort(const std::string &if_name, PortID idx) |
| { |
| if (if_name != "cpu_ports" && if_name != "cu_vector_ports" && |
| if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") { |
| // pass along to super class |
| return ClockedObject::getPort(if_name, idx); |
| } else { |
| if (if_name == "cpu_ports") { |
| if (idx > numCpuPorts) |
| panic("ProtocolTester: unknown cpu port %d\n", idx); |
| return *cpuPorts[idx]; |
| } else if (if_name == "cu_vector_ports") { |
| if (idx > numVectorPorts) |
| panic("ProtocolTester: unknown cu vect port %d\n", idx); |
| return *cuVectorPorts[idx]; |
| } else if (if_name == "cu_sqc_ports") { |
| if (idx > numSqcPorts) |
| panic("ProtocolTester: unknown cu sqc port %d\n", idx); |
| return *cuSqcPorts[idx]; |
| } else { |
| assert(if_name == "cu_scalar_ports"); |
| if (idx > numScalarPorts) |
| panic("ProtocolTester: unknown cu scal port %d\n", idx); |
| return *cuScalarPorts[idx]; |
| } |
| } |
| |
| assert(false); |
| } |
| |
| bool |
| ProtocolTester::checkExit() |
| { |
| if (nextEpisodeId > maxNumEpisodes) { |
| if (!sentExitSignal) { |
| // all done |
| inform("Total completed episodes: %d\n", nextEpisodeId - 1); |
| exitSimLoop("GPU Ruby Tester: Passed!"); |
| sentExitSignal = true; |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| bool |
| ProtocolTester::checkDRF(Location atomic_loc, |
| Location loc, bool isStore) const |
| { |
| if (debugTester) { |
| // go through all active episodes in all threads |
| for (const GpuThread* th : wfs) { |
| if (!th->checkDRF(atomic_loc, loc, isStore)) |
| return false; |
| } |
| |
| for (const GpuThread* th : cpuThreads) { |
| if (!th->checkDRF(atomic_loc, loc, isStore)) |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| void |
| ProtocolTester::dumpErrorLog(std::stringstream& ss) |
| { |
| if (!sentExitSignal) { |
| // go through all threads and dump their outstanding requests |
| for (auto t : cpuThreads) { |
| t->printAllOutstandingReqs(ss); |
| } |
| |
| for (auto t : wfs) { |
| t->printAllOutstandingReqs(ss); |
| } |
| |
| // dump error log into a file |
| assert(logFile); |
| ccprintf(*(logFile->stream()), "%s", ss.str()); |
| logFile->stream()->flush(); |
| |
| sentExitSignal = true; |
| // terminate the simulation |
| panic("GPU Ruby Tester: Failed!\n"); |
| } |
| } |
| |
| bool |
| ProtocolTester::SeqPort::recvTimingResp(PacketPtr pkt) |
| { |
| // get the requesting thread from the original sender state |
| ProtocolTester::SenderState* senderState = |
| safe_cast<ProtocolTester::SenderState*>(pkt->senderState); |
| GpuThread *th = senderState->th; |
| |
| th->hitCallback(pkt); |
| |
| return true; |
| } |
| |
| ProtocolTester* |
| ProtocolTesterParams::create() const |
| { |
| return new ProtocolTester(*this); |
| } |