Enable the ruby tester of GPU_VIPER protocol
Added an example script 'viper_ruby_test.py' to test VIPER,
and slightly changed the flags in packet/port interface.
Change-Id: Ic5fc551e294687bc4838671dd36fac92673f958b
diff --git a/configs/example/viper_ruby_test.py b/configs/example/viper_ruby_test.py
new file mode 100644
index 0000000..2a69ffd
--- /dev/null
+++ b/configs/example/viper_ruby_test.py
@@ -0,0 +1,359 @@
+#
+# Copyright (c) 2018 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Tuan Ta, Xianwei Zhang
+#
+
+import m5
+from m5.objects import *
+from m5.defines import buildEnv
+from m5.util import addToPath
+import os, optparse, sys
+
+addToPath('../')
+
+from common import Options
+from ruby import Ruby
+
+# Get paths we might need. It's expected this file is in m5/configs/example.
+config_path = os.path.dirname(os.path.abspath(__file__))
+config_root = os.path.dirname(config_path)
+m5_root = os.path.dirname(config_root)
+
+parser = optparse.OptionParser()
+Options.addNoISAOptions(parser)
+
+# GPU Ruby tester options
+parser.add_option("--cache-size", type="int", default=0,
+ help="Cache sizes to use. Small encourages races between \
+ requests and writebacks. Large stresses write-through \
+ and/or write-back GPU caches. Range [0..1]")
+parser.add_option("--system-size", type="int", default=0,
+ help="This option defines how many CUs, CPUs and cache \
+ components in the test system. Range[0..1]")
+parser.add_option("--address-range", type="int", default=0,
+ help="This option defines the number of atomic \
+ locations that affects the working set's size. \
+ A small number of atomic locations encourage more \
+ races among threads. The large option stresses cache \
+ resources. Range [0..1]")
+parser.add_option("--episode-length", type="int", default=0,
+ help="This option defines the number of LDs and \
+ STs in an episode. The small option encourages races \
+ between the start and end of an episode. The long \
+ option encourages races between LDs and STs in the \
+ same episode. Range [0..2]")
+parser.add_option("--test-length", type="int", default=1,
+ help="The number of episodes to be executed by each \
+ wavefront. This determines the maximum number, i.e., \
+ val X #WFs, of episodes to be executed in the test.")
+parser.add_option("--debug-tester", action='store_true',
+ help="This option will turn on DRF checker")
+parser.add_option("--random-seed", type="int", default=0,
+ help="Random seed number. Default value (i.e., 0) means \
+ using runtime-specific value")
+parser.add_option("--log-file", type="string", default="gpu-ruby-test.log")
+
+# GPU configurations
+parser.add_option("--wf-size", type="int", default=64, help="wavefront size")
+
+parser.add_option("-w", "--wavefronts-per-cu", type="int", default=1,
+ help="Number of wavefronts per cu")
+
+parser.add_option("--cu-per-sqc", type="int", default=4,
+ help="number of CUs sharing an SQC")
+
+parser.add_option("--cu-per-scalar-cache", type="int", default=4,
+ help="number of CUs sharing an scalar cache")
+
+parser.add_option("--cu-per-sa", type="int", default=4,
+ help="number of CUs per shader array \
+ This must be a multiple of options.cu-per-sqc and \
+ options.cu-per-scalar")
+#
+# Add the ruby specific and protocol specific options
+#
+Ruby.define_options(parser)
+
+execfile(os.path.join(config_root, "common", "Options.py"))
+
+(options, args) = parser.parse_args()
+
+#
+# Set the default cache size and associativity to be very small to encourage
+# races between requests and writebacks.
+#
+options.l1d_size="256B"
+options.l1i_size="256B"
+options.l2_size="512B"
+options.l3_size="1kB"
+options.l1d_assoc=2
+options.l1i_assoc=2
+options.l2_assoc=2
+options.l3_assoc=2
+
+#
+# Set up cache size - 2 options
+# 0: small cache
+# 1: large cache
+#
+if (options.cache_size == 0):
+ options.tcp_size="256B"
+ options.tcp_assoc=2
+ options.tcc_size="1kB"
+ options.tcc_assoc=2
+elif (options.cache_size == 1):
+ options.tcp_size="256kB"
+ options.tcp_assoc=16
+ options.tcc_size="1024kB"
+ options.tcc_assoc=16
+else:
+ print "Error: option cache_size '%s' not recognized" % options.cache_size
+ sys.exit(1)
+
+#
+# Set up system size - 2 options
+#
+if (options.system_size == 0):
+ # 1 CU, 1 CPU, 1 SQC, 1 Scalar
+ options.wf_size = 1
+ options.wavefronts_per_cu = 1
+ options.num_cpus = 1
+ options.cu_per_sqc = 1
+ options.cu_per_scalar_cache = 1
+ options.num_compute_units = 1
+elif (options.system_size == 1):
+ # 32 CUs, 4 CPUs, 8 SQCs, 8 Scalars
+ options.wf_size = 16
+ options.wavefronts_per_cu = 8
+ options.num_cpus = 4
+ options.cu_per_sqc = 4
+ options.cu_per_scalar_cache = 4
+ options.num_compute_units = 32
+else:
+ print "Error: option system size '%s' not recognized" \
+ % options.system_size
+ sys.exit(1)
+
+#
+# set address range - 2 options
+# level 0: small
+# level 1: large
+# each location corresponds to a 4-byte piece of data
+#
+options.mem_size = '1024MB'
+num_atomic_locs = 10
+num_regular_locs_per_atomic_loc = 10000
+if (options.address_range == 1):
+ num_atomic_locs = 100
+ num_regular_locs_per_atomic_loc = 100000
+elif (options.address_range != 0):
+ print "Error: option address_range '%s' not recognized" \
+ % options.address_range
+ sys.exit(1)
+
+#
+# set episode length (# of actions per episode) - 3 options
+# 0: 10 actions
+# 1: 100 actions
+# 2: 500 actions
+#
+eps_length = 10
+if (options.episode_length == 1):
+ eps_length = 100
+elif (options.episode_length == 2):
+ eps_length = 500
+elif (options.episode_length != 0):
+ print "Error: option episode_length '%s' not recognized" \
+ % options.episode_length
+ sys.exit(1)
+
+# set the Ruby's and tester's deadlock thresholds
+# the Ruby's deadlock detection is the primary check for deadlock.
+# the tester's deadlock threshold detection is a secondary check for deadlock
+# if there is a bug in RubyPort that causes a packet not to return to the
+# tester properly, the tester will throw a deadlock exception.
+# we set cache_deadlock_threshold < tester_deadlock_threshold to detect
+# deadlock caused by Ruby protocol first before one caused by the coalescer
+options.cache_deadlock_threshold = 100000000
+tester_deadlock_threshold = 1000000000
+
+# for now, we're testing only GPU protocol, so we set num_cpus to 0
+options.num_cpus = 0
+# number of CPUs and CUs
+n_CPUs = options.num_cpus
+n_CUs = options.num_compute_units
+# set test length, i.e., number of episodes per wavefront * #WFs
+# test length can be 1x#WFs, 10x#WFs, 100x#WFs, ...
+n_WFs = n_CUs * options.wavefronts_per_cu
+max_episodes = options.test_length * n_WFs
+# number of SQC and Scalar caches
+assert(n_CUs % options.cu_per_sqc == 0)
+n_SQCs = int(n_CUs/options.cu_per_sqc)
+options.num_sqc = n_SQCs
+assert(n_CUs % options.cu_per_scalar_cache == 0)
+n_Scalars = int(n_CUs/options.cu_per_scalar_cache)
+
+# for now, we only set CUs and SQCs
+# TODO: add scalars if necessary
+n_Scalars = 0
+options.num_scalar_cache = n_Scalars
+if n_Scalars == 0:
+ options.cu_per_scalar_cache = 0
+
+if args:
+ print "Error: script doesn't take any positional arguments"
+ sys.exit(1)
+
+#
+# Create GPU Ruby random tester
+#
+tester = ProtocolTester(cus_per_sqc = options.cu_per_sqc,
+ cus_per_scalar = options.cu_per_scalar_cache,
+ wavefronts_per_cu = options.wavefronts_per_cu,
+ workitems_per_wavefront = options.wf_size,
+ num_atomic_locations = num_atomic_locs,
+ num_normal_locs_per_atomic = \
+ num_regular_locs_per_atomic_loc,
+ max_num_episodes = max_episodes,
+ episode_length = eps_length,
+ debug_tester = options.debug_tester,
+ random_seed = options.random_seed,
+ log_file = options.log_file)
+
+#
+# Create the M5 system. Note that the memory object isn't actually
+# used by the vitester, but is included to support
+# the M5 memory size == Ruby memory size checks
+#
+# The system doesn't have real CPUs or CUs.
+# It just has a tester that has physical ports to be connected to Ruby
+#
+system = System(cpu = tester,
+ mem_ranges = [AddrRange(options.mem_size)],
+ cache_line_size = options.cacheline_size,
+ mem_mode = 'timing')
+
+system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.clk_domain = SrcClockDomain(clock = options.sys_clock,
+ voltage_domain = system.voltage_domain)
+
+options.num_cp = 0
+
+#
+# Create the Ruby system
+#
+Ruby.create_system(options, False, system)
+
+#
+# The tester is most effective when randomization is turned on and
+# artifical delay is randomly inserted on messages
+#
+system.ruby.randomization = True
+
+# assert that we got the right number of Ruby ports
+assert(len(system.ruby._cpu_ports) == n_CPUs + n_CUs + n_SQCs + n_Scalars)
+
+#
+# attach Ruby ports to the tester
+# in the order: cpu_sequencers,
+# vector_coalescers,
+# sqc_sequencers,
+# scalar_sequencers
+#
+print "Attaching ruby ports to the tester"
+i = 0
+for ruby_port in system.ruby._cpu_ports:
+ ruby_port.no_retry_on_stall = True
+ ruby_port.using_ruby_tester = False
+
+ if i < n_CPUs:
+ tester.cpu_ports = ruby_port.slave
+ elif i < (n_CPUs + n_CUs):
+ tester.cu_vector_ports = ruby_port.slave
+ elif i < (n_CPUs + n_CUs + n_SQCs):
+ tester.cu_sqc_ports = ruby_port.slave
+ else:
+ tester.cu_scalar_ports = ruby_port.slave
+
+ i += 1
+
+#
+# Create CPU threads
+#
+thread_clock = SrcClockDomain(clock = '1GHz',
+ voltage_domain = system.voltage_domain)
+
+cpu_threads = []
+print "Creating %i CpuThreads" % (n_CPUs)
+for cpu_idx in range(n_CPUs):
+ cpu_threads.append(CpuThread(thread_id = cpu_idx,
+ num_lanes = 1, # CPU thread is scalar
+ clk_domain = thread_clock,
+ deadlock_threshold = \
+ tester_deadlock_threshold))
+tester.cpu_threads = cpu_threads
+
+#
+# Create GPU wavefronts
+#
+wavefronts = []
+g_thread_idx = n_CPUs
+print "Creating %i WFs attached to %i CUs" % \
+ (n_CUs * tester.wavefronts_per_cu, n_CUs)
+for cu_idx in range(n_CUs):
+ for wf_idx in range(tester.wavefronts_per_cu):
+ wavefronts.append(GpuWavefront(thread_id = g_thread_idx,
+ cu_id = cu_idx,
+ num_lanes = options.wf_size,
+ clk_domain = thread_clock,
+ deadlock_threshold = \
+ tester_deadlock_threshold))
+ g_thread_idx += 1
+tester.wavefronts = wavefronts
+
+# -----------------------
+# run simulation
+# -----------------------
+
+root = Root( full_system = False, system = system )
+
+# Not much point in this being higher than the L1 latency
+m5.ticks.setGlobalFrequency('1ns')
+
+# instantiate configuration
+m5.instantiate()
+
+# simulate until program terminates
+exit_event = m5.simulate(options.abs_max_tick)
+
+print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py
index e4ba180..7432a4b 100644
--- a/configs/ruby/GPU_VIPER.py
+++ b/configs/ruby/GPU_VIPER.py
@@ -311,24 +311,15 @@
self.probeToL3 = probe_to_l3
self.respToL3 = resp_to_l3
-class DirMem(RubyDirectoryMemory, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
-
- phys_mem_size = AddrRange(options.mem_size).size()
- mem_module_size = phys_mem_size / options.num_dirs
- dir_size = MemorySize('0B')
- dir_size.value = mem_module_size
- self.size = dir_size
-
class DirCntrl(Directory_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
+ def create(self, options, dir_ranges, ruby_system, system):
self.version = self.versionCount()
self.response_latency = 30
- self.directory = DirMem()
- self.directory.create(options, ruby_system, system)
+ self.addr_ranges = dir_ranges
+
+ self.directory = RubyDirectoryMemory()
self.L3CacheMemory = L3Cache()
self.L3CacheMemory.create(options, ruby_system, system)
@@ -389,6 +380,8 @@
help = "tcp assoc")
parser.add_option("--noL1", action = "store_true", default = False,
help = "bypassL1")
+ parser.add_option("--buffers-size", type="int", default=128,
+ help="Size of MessageBuffers at the controller")
def create_system(options, full_system, system, dma_devices, ruby_system):
if buildEnv['PROTOCOL'] != 'GPU_VIPER':
@@ -427,10 +420,35 @@
mainCluster = Cluster(intBW=crossbar_bw)
else:
mainCluster = Cluster(intBW=8) # 16 GB/s
+
+ # See comment in config/common/MemConfig.py for explanation of this value
+ xor_low_bit = 20
+
+ if options.numa_high_bit:
+ numa_bit = options.numa_high_bit
+ dir_bits = int(math.log(options.num_dirs, 2))
+ xor_high_bit = xor_low_bit + dir_bits - 1
+ else:
+ # if the numa_bit is not specified, set the directory bits as the
+ # lowest bits above the block offset bits, and the numa_bit as the
+ # highest of those directory bits
+ dir_bits = int(math.log(options.num_dirs, 2))
+ block_size_bits = int(math.log(options.cacheline_size, 2))
+ numa_bit = block_size_bits + dir_bits - 1
+ xor_high_bit = xor_low_bit + dir_bits - 1
for i in xrange(options.num_dirs):
+ dir_ranges = []
+ for r in system.mem_ranges:
+ #addr_range = m5.objects.AddrRange(r.size())
+ addr_range = m5.objects.AddrRange(r.start, size = r.size(),
+ intlvHighBit = numa_bit,
+ intlvBits = dir_bits,
+ intlvMatch = i,
+ xorHighBit = xor_high_bit)
+ dir_ranges.append(addr_range)
dir_cntrl = DirCntrl(noTCCdir = True, TCC_select_num_bits = TCC_bits)
- dir_cntrl.create(options, ruby_system, system)
+ dir_cntrl.create(options, dir_ranges, ruby_system, system)
dir_cntrl.number_of_TBEs = options.num_tbes
dir_cntrl.useL3OnWT = options.use_L3_on_WT
# the number_of_TBEs is inclusive of TBEs below
@@ -492,8 +510,9 @@
cp_cntrl.responseToCore = MessageBuffer()
cp_cntrl.responseToCore.slave = ruby_system.network.master
- cp_cntrl.mandatoryQueue = MessageBuffer()
cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
+ cp_cntrl.mandatoryQueue = \
+ MessageBuffer(buffer_size=options.buffers_size)
cpuCluster.add(cp_cntrl)
@@ -537,7 +556,8 @@
tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
tcp_cntrl.responseToTCP.slave = ruby_system.network.master
- tcp_cntrl.mandatoryQueue = MessageBuffer()
+ tcp_cntrl.mandatoryQueue = \
+ MessageBuffer(buffer_size=options.buffers_size)
gpuCluster.add(tcp_cntrl)
@@ -562,7 +582,8 @@
sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
sqc_cntrl.responseToSQC.slave = ruby_system.network.master
- sqc_cntrl.mandatoryQueue = MessageBuffer()
+ sqc_cntrl.mandatoryQueue = \
+ MessageBuffer(buffer_size=options.buffers_size)
# SQC also in GPU cluster
gpuCluster.add(sqc_cntrl)
@@ -605,7 +626,8 @@
tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
tcp_cntrl.responseToTCP.slave = ruby_system.network.master
- tcp_cntrl.mandatoryQueue = MessageBuffer()
+ tcp_cntrl.mandatoryQueue = \
+ MessageBuffer(buffer_size=options.buffers_size)
gpuCluster.add(tcp_cntrl)
diff --git a/src/cpu/testers/gpu_ruby_test/AddressManager.cc b/src/cpu/testers/gpu_ruby_test/AddressManager.cc
new file mode 100644
index 0000000..6ef52fc
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/AddressManager.cc
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#include "cpu/testers/gpu_ruby_test/AddressManager.hh"
+
+#include <algorithm>
+
+#include "base/intmath.hh"
+#include "base/random.hh"
+#include "base/trace.hh"
+
+const int AddressManager::INVALID_VALUE = -1;
+const int AddressManager::INVALID_LOCATION = -1;
+
+AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic)
+ : numAtomicLocs(n_atomic_locs),
+ numLocsPerAtomic(n_normal_locs_per_atomic)
+{
+ assert(numAtomicLocs > 0 && numLocsPerAtomic > 0);
+ numNormalLocs = numAtomicLocs * numLocsPerAtomic;
+
+ // generate random address map
+ randAddressMap.resize(numAtomicLocs + numNormalLocs);
+ for (Location i = 0; i < numAtomicLocs + numNormalLocs; ++i) {
+ // all addresses are sizeof(Value) (i.e., 4-byte) aligned
+ randAddressMap[i] = (Addr)((i + 128) << floorLog2(sizeof(Value)));
+ }
+
+ // randomly shuffle randAddressMap
+ std::random_shuffle(randAddressMap.begin(), randAddressMap.end());
+
+ // initialize atomic locations
+ // first and last normal location per atomic location
+ Location first, last;
+ for (Location atomic_loc = 0; atomic_loc < numAtomicLocs; ++atomic_loc) {
+ first = numAtomicLocs + numLocsPerAtomic * atomic_loc;
+ last = first + numLocsPerAtomic - 1;
+ atomicStructs.push_back(new AtomicStruct(atomic_loc, first, last));
+ }
+
+ // initialize log table
+ for (Location loc = 0; loc < numAtomicLocs + numNormalLocs; ++loc) {
+ logTable.push_back(new LastWriter());
+ }
+}
+
+AddressManager::~AddressManager()
+{
+ for (AtomicStruct* atomic_struct : atomicStructs)
+ delete atomic_struct;
+ for (LastWriter* lw : logTable)
+ delete lw;
+}
+
+Addr
+AddressManager::getAddress(Location loc)
+{
+ assert(loc < numAtomicLocs + numNormalLocs && loc >= 0);
+ return randAddressMap[loc];
+}
+
+AddressManager::Location
+AddressManager::getAtomicLoc()
+{
+ Location ret_atomic_loc = random() % numAtomicLocs;
+ atomicStructs[ret_atomic_loc]->startLocSelection();
+ return ret_atomic_loc;
+}
+
+AddressManager::Location
+AddressManager::getLoadLoc(Location atomic_loc)
+{
+ assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
+ return atomicStructs[atomic_loc]->getLoadLoc();
+}
+
+AddressManager::Location
+AddressManager::getStoreLoc(Location atomic_loc)
+{
+ assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
+ return atomicStructs[atomic_loc]->getStoreLoc();
+}
+
+void
+AddressManager::finishLocSelection(Location atomic_loc)
+{
+ assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
+ atomicStructs[atomic_loc]->endLocSelection();
+}
+
+void
+AddressManager::releaseLocation(Location atomic_loc, Location loc)
+{
+ assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
+ atomicStructs[atomic_loc]->releaseLoc(loc);
+}
+
+std::string
+AddressManager::printLastWriter(Location loc) const
+{
+ return logTable[loc]->print();
+}
+
+// ------------------- AtomicStruct --------------------------
+AddressManager::AtomicStruct::AtomicStruct(Location atomic_loc,
+ Location loc_begin,
+ Location loc_end)
+{
+ // the location range must have at least 1 location
+ assert(loc_begin <= loc_end);
+
+ atomicLoc = atomic_loc;
+ arraySize = loc_end - loc_begin + 1;
+ locationBase = loc_begin;
+
+ // allocate an array of arrray_size
+ locArray = new Location[arraySize];
+
+ // initialize locArray & locProps
+ Location loc;
+ for (int offset = 0; offset < arraySize; ++offset) {
+ loc = locationBase + offset;
+ locArray[offset] = loc;
+ locProps.push_back(LocProperty(offset, 0));
+ }
+
+ // region (1) and (3) are initially empty
+ firstMark = 0;
+ secondMark = arraySize;
+ // no request made at this location so far
+ requestCount = 0;
+}
+
+AddressManager::AtomicStruct::~AtomicStruct()
+{
+ delete[] locArray;
+}
+
+void
+AddressManager::AtomicStruct::startLocSelection()
+{
+ assert(firstMark >= 0);
+ assert(firstMark <= secondMark);
+ assert(secondMark <= arraySize);
+ // make sure loadStoreMap has been cleared
+ assert(loadStoreMap.empty());
+
+ // this atomic location is picked for Atomic_ACQ
+ // and Atomic_REL in an episode
+ requestCount += 2;
+ // add two expected values in expectedValues set
+ expectedValues.insert(requestCount - 1);
+ expectedValues.insert(requestCount - 2);
+}
+
+AddressManager::Location
+AddressManager::AtomicStruct::getLoadLoc()
+{
+ assert(firstMark >= 0);
+ assert(firstMark <= secondMark);
+ assert(secondMark <= arraySize);
+
+ if (firstMark == arraySize) {
+ // no location can be picked for a LD now, so return an empty location
+ return INVALID_LOCATION;
+ } else {
+ // we can pick any location btw
+ // locArray [firstMark : arraySize-1]
+ int range_size = arraySize - firstMark;
+ Location ret_loc = locArray[firstMark + random() % range_size];
+
+ // update loadStoreMap
+ LdStMap::iterator it = loadStoreMap.find(ret_loc);
+
+ if (it == loadStoreMap.end()) {
+ // insert a new entry to the map b/c the entry is not there yet
+ // to mark this location has been picked for a LD
+ loadStoreMap.insert(std::pair<Location, LdStBits>
+ (ret_loc, LdStBits(true,false)));
+ } else {
+ // otherwise, just update the LD bit
+ (it->second).first = true;
+ }
+
+ return ret_loc;
+ }
+}
+
+AddressManager::Location
+AddressManager::AtomicStruct::getStoreLoc()
+{
+ assert(firstMark >= 0);
+ assert(firstMark <= secondMark);
+ assert(secondMark <= arraySize);
+
+ if (firstMark == secondMark) {
+ // no location can be picked for a ST now, return an invalid location
+ return INVALID_LOCATION;
+ } else {
+ // we can pick any location btw [firstMark : secondMark-1]
+ int range_size = secondMark - firstMark;
+ Location ret_loc = locArray[firstMark + random() % range_size];
+
+ // update loadStoreMap
+ LdStMap::iterator it = loadStoreMap.find(ret_loc);
+
+ if (it == loadStoreMap.end()) {
+ // insert a new entry to the map b/c the entry is not there yet
+ // to mark this location has been picked for a ST
+ loadStoreMap.insert(std::pair<Location, LdStBits>
+ (ret_loc, LdStBits(false,true)));
+ } else {
+ // otherwise, just update the ST bit
+ (it->second).second = true;
+ }
+
+ return ret_loc;
+ }
+}
+
+// for each entry in loadStoreMap,
+// if <LD_bit, ST_bit> == <1,0>
+// - if the location is in (2), then move it to (3)
+// - if the location is in (3), no move
+// - otherwise, throw an error
+// if <LD_bit, ST_bit> == <0,1> or <1,1>
+// - move it from (2) to (1)
+void
+AddressManager::AtomicStruct::endLocSelection()
+{
+ assert(firstMark >= 0);
+ assert(firstMark <= secondMark);
+ assert(secondMark <= arraySize);
+
+ for (auto& it : loadStoreMap) {
+ Location loc = it.first;
+ LdStBits p = it.second;
+
+ assert(loc >= locationBase && loc < locationBase + arraySize);
+ LocProperty& loc_prop = locProps[loc - locationBase];
+
+ if (p.first && !p.second) {
+ // this location has been picked for LD(s) but not ST
+ // it must be in either region (2) or (3)
+ assert(inSecondRegion(loc_prop.first) ||
+ inThirdRegion(loc_prop.first));
+
+ if (inSecondRegion(loc_prop.first)) {
+ // there is no owner of this location yet
+ assert(loc_prop.second == 0);
+
+ // pick the last location in (2) to swap
+ Location swapped_loc = locArray[secondMark - 1];
+ LocProperty& swapped_loc_prop =
+ locProps[swapped_loc - locationBase];
+
+ // swap loc and swapped_loc
+ swap(loc_prop, swapped_loc_prop);
+
+ // then, expand (3)
+ secondMark--;
+ }
+
+ // increment the location's number of owners
+ loc_prop.second++;
+ } else if (p.second) {
+ // this location has been picked for ST(s) and/or LD(s)
+ // it must be in region (2)
+ assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
+
+ // pick the first location in (2) to swap
+ Location swapped_loc = locArray[firstMark];
+ LocProperty& swapped_loc_prop =
+ locProps[swapped_loc - locationBase];
+
+ // swap loc and swapped_loc
+ swap(loc_prop, swapped_loc_prop);
+
+ // then, expand (1)
+ firstMark++;
+
+ // increment the location's number of owners
+ loc_prop.second++;
+ } else {
+ panic("Location in loadStoreMap but wasn't picked "
+ "in any action\n");
+ }
+ }
+
+ // clear the ld_st_map
+ loadStoreMap.clear();
+}
+
+void
+AddressManager::AtomicStruct::releaseLoc(Location loc)
+{
+ assert(loc >= locationBase && loc < locationBase + arraySize);
+
+ LocProperty& loc_prop = locProps[loc - locationBase];
+
+ if (inFirstRegion(loc_prop.first)) {
+ // this location must have exactly 1 owner
+ assert(loc_prop.second == 1);
+
+ // pick the last location in region 1 to swap
+ Location swapped_loc = locArray[firstMark - 1];
+ LocProperty& swapped_loc_prop = locProps[swapped_loc - locationBase];
+
+ // swap loc and swapped_loc
+ swap(loc_prop, swapped_loc_prop);
+
+ // then shrink (1)
+ firstMark--;
+
+ // reset the location's number of owners
+ loc_prop.second = 0;
+ } else if (inThirdRegion(loc_prop.first)) {
+ // this location must have at least 1 owner
+ assert(loc_prop.second >= 1);
+
+ if (loc_prop.second == 1) {
+ // pick the first location in region 3 to swap
+ Location swapped_loc = locArray[secondMark];
+ LocProperty& swapped_loc_prop =
+ locProps[swapped_loc - locationBase];
+
+ // swap loc and swapped_loc
+ swap(loc_prop, swapped_loc_prop);
+
+ // then shrink (3)
+ secondMark++;
+ }
+ // decrement the loc's number of owners
+ loc_prop.second--;
+ } else {
+ // some one else must already reset this counter
+ assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
+ }
+}
+
+bool
+AddressManager::AtomicStruct::isExpectedValue(Value val)
+{
+ ExpectedValueSet::iterator it = expectedValues.find(val);
+
+ if (it == expectedValues.end()) {
+ return false;
+ }
+
+ // erase this value b/c it's done
+ expectedValues.erase(it);
+
+ return true;
+}
+
+void
+AddressManager::AtomicStruct::swap(LocProperty& prop_1, LocProperty& prop_2)
+{
+ int new_idx_1 = prop_2.first;
+ int new_idx_2 = prop_1.first;
+
+ // swap the two locations in locArray
+ Location tmp = locArray[prop_1.first];
+ locArray[prop_1.first] = locArray[prop_2.first];
+ locArray[prop_2.first] = tmp;
+
+ // update their new indices
+ prop_1.first = new_idx_1;
+ prop_2.first = new_idx_2;
+}
+
+// ------------------ log table ---------------------
+void
+AddressManager::updateLogTable(Location loc, int thread_id, int episode_id,
+ Value new_value, Tick cur_tick, int cu_id)
+{
+ assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
+ logTable[loc]->update(thread_id, cu_id, episode_id, new_value, cur_tick);
+}
+
+AddressManager::Value
+AddressManager::getLoggedValue(Location loc) const
+{
+ assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
+ return logTable[loc]->getLastStoredValue();
+}
+
+bool
+AddressManager::validateAtomicResp(Location loc, Value ret_val)
+{
+ assert(loc >= 0 && loc < numAtomicLocs);
+ return atomicStructs[loc]->isExpectedValue(ret_val);
+}
diff --git a/src/cpu/testers/gpu_ruby_test/AddressManager.hh b/src/cpu/testers/gpu_ruby_test/AddressManager.hh
new file mode 100644
index 0000000..b7ccbf3
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/AddressManager.hh
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_ADDRESSMANAGER_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_ADDRESSMANAGER_HH_
+
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "base/types.hh"
+#include "sim/eventq.hh"
+
+/*
+ * --- AddressManager has 3 main tasks ---
+ * (1) generate DRF request sequences
+ * (2) maintain internal log table
+ * (3) validate return values against ones in the log table
+ *
+ * A location is an abstract index of a unique real address.
+ * It's used internally within the tester only.
+ * randAddressMap has the mapping between a location and its real address.
+ *
+ * A value is an integer that a location in real memory can store.
+ * for now, we assume a value is 4-byte
+ *
+ * The location range (randAddressMap) has two distinct parts:
+ * Atomic locations: in the 1st part of randAddressMap &
+ * Non-atomic locations (or just locations): in the 2nd part
+ */
+
+/*
+ * --- DRF request sequence generation ---
+ * Each lane of an episode starts selecting its location by calling:
+ * (1) getAtomicLoc
+ * (2) getLoadLoc/getStoreLoc
+ * (3) finishLocSelection
+ *
+ * Each lane of an episode completes its executing by calling:
+ * releaseLocation for all locations it selected
+ */
+
+/*
+ * --- Internal structures ---
+ * There are multiple atomic structures, each of which corresponds
+ * to an atomic location.
+ *
+ * Each atomic structure manages a distinct range of locations in locArray
+ * This array is partitioned into 3 parts that are used to select locations
+ * for LDs and STs. Here is the location selecting rule:
+ * | (1) | (2) | (3) |
+ * - all locations in (1) cannot be picked for any LD and ST action
+ * - all locations in (2) can be picked for either LD or ST action
+ * - all locations in (3) can be picked for LD action only
+ *
+ * We maintain the 3 parts by 2 indices firstMark and secondMark.
+ * As locations are moved between partitions, both indices are updated
+ * accordingly.
+ * [0 .. firstMark-1] part (1)
+ * [firstMark .. secondMark-1] part (2)
+ * [secondMark .. arraySize-1] part (3)
+ *
+ * Each location has its context/property. locProps maintains
+ * contexts/properties of all locations. Context/property includes
+ * - current index of a location in locArray
+ * - the number of owners who are currently using the location
+ *
+ * To guarantee DRF constraints, the following conditions must hold
+ * - all locations in (1) have exactly 1 owner
+ * - all locations in (2) have exactly 0 owner
+ * - all locations in (3) have at least 1 owner
+ * - A LD request can randomly pick any location in (2) & (3)
+ * - A ST request can randomly pick any location in (2)
+ *
+ * loadStoreMap maintains all locations already selected for LDs/STs so far
+ *
+ * When endLocSelection is called (i.e., we've picked all locations for an
+ * episode), we need to move each selected location to its right partition.
+ * if LD_bit == 1 && ST_bit == 0 (i.e., picked for LDs), then move the
+ * location to (3) -> future LDs can pick it.
+ * if LD_bit == 0 && ST_bit == 1, then move the location to (1) -> NO future
+ * action can pick it until this episode is done.
+ * if LD_bit == 1 && ST_bit == 1, then move the location to (1) -> NO future
+ * action can pick it until this episode is done.
+ * clear the loadStoreMap
+ */
+
+class AddressManager
+{
+ public:
+ AddressManager(int n_atomic_locs, int numNormalLocsPerAtomic);
+ ~AddressManager();
+
+ typedef int32_t Value;
+ typedef int32_t Location;
+
+ // return the unique address mapped to a location
+ Addr getAddress(Location loc);
+ // return a unique atomic location & start picking locations
+ Location getAtomicLoc();
+ // return a random location for LD
+ Location getLoadLoc(Location atomic_loc);
+ // return a random location for ST
+ Location getStoreLoc(Location atomic_loc);
+ // finish picking locations
+ void finishLocSelection(Location atomic_loc);
+ // an episode is done, release location I've picked
+ void releaseLocation(Location atomic_loc, Location loc);
+ // update a log table entry with a given set of values
+ void updateLogTable(Location loc, int threadId, int episodeId,
+ Value new_value, Tick curTick, int cuId = -1);
+ // return the current value in the log table
+ Value getLoggedValue(Location loc) const;
+ // validate atomic response
+ bool validateAtomicResp(Location loc, Value ret_val);
+
+ std::string printLastWriter(Location loc) const;
+
+ static const int INVALID_VALUE;
+ static const int INVALID_LOCATION;
+
+ private:
+ class LastWriter
+ {
+ public:
+ LastWriter()
+ : threadId(-1), cuId(-1), episodeId(-1), value(0),
+ writeTick(0)
+ { }
+
+ const std::string print() const
+ {
+ return "(Thread ID " + std::to_string(threadId) +
+ ", CU ID " + std::to_string(cuId) +
+ ", Episode ID " + std::to_string(episodeId) +
+ ", Value " + std::to_string(value) +
+ ", Tick " + std::to_string(writeTick) +
+ ")";
+ }
+
+ void update(int _thread, int _cu, int _episode, Value _value,
+ Tick _tick)
+ {
+ threadId = _thread;
+ cuId = _cu;
+ episodeId = _episode;
+ value = _value;
+ writeTick = _tick;
+ }
+
+ Value getLastStoredValue() const { return value; }
+
+ private:
+ int threadId;
+ int cuId;
+ int episodeId;
+ Value value;
+ Tick writeTick;
+ };
+
+ class AtomicStruct
+ {
+ public:
+ AtomicStruct(Location atom_loc, Location loc_begin, Location loc_end);
+ ~AtomicStruct();
+
+ // functions picking locations for LD/ST/ATOMIC ops
+ void startLocSelection();
+ Location getLoadLoc();
+ Location getStoreLoc();
+ void endLocSelection();
+
+ // an episode completed its actions
+ // return locations to their correct positions
+ void releaseLoc(Location loc);
+ // is the value what we expect?
+ bool isExpectedValue(Value val);
+
+ private:
+ Location atomicLoc;
+ Location locationBase;
+
+ // array storing all locations this structure is managing
+ Location* locArray;
+ int firstMark, secondMark;
+ int arraySize;
+
+ // a vector of location's properties
+ typedef std::pair<int, int> LocProperty;
+ typedef std::vector<LocProperty> LocPropTable;
+ LocPropTable locProps;
+
+ // a temporary map of location and its LD/ST selection
+ typedef std::pair<bool, bool> LdStBits;
+ typedef std::unordered_map<Location, LdStBits> LdStMap;
+ LdStMap loadStoreMap;
+
+ // number of atomic requests at this location so far
+ int requestCount;
+ // a set of expected values
+ // when we request the first n atomic ops, we expect to receive n
+ // return values from [0 .. n-1]
+ typedef std::unordered_set<Value> ExpectedValueSet;
+ ExpectedValueSet expectedValues;
+
+ // swap two locations in locArray
+ void swap(LocProperty& prop_1, LocProperty& prop_2);
+
+ bool inFirstRegion(int idx) const
+ {
+ return (idx >= 0 && idx < firstMark);
+ }
+ bool inSecondRegion(int idx) const
+ {
+ return (idx >= firstMark && idx < secondMark);
+ }
+ bool inThirdRegion(int idx) const
+ {
+ return (idx >= secondMark && idx < arraySize);
+ }
+ };
+
+ // number of atomic locations
+ int numAtomicLocs;
+ // number of normal/non-atomic locations per atomic structure
+ int numLocsPerAtomic;
+ // total number of non-atomic locations
+ int numNormalLocs;
+
+ // location - address mapping
+ typedef std::vector<Addr> AddressMap;
+ AddressMap randAddressMap;
+
+ // a list of atomic structures
+ typedef std::vector<AtomicStruct*> AtomicStructTable;
+ AtomicStructTable atomicStructs;
+
+ // internal log table
+ typedef std::vector<LastWriter*> LogTable;
+ LogTable logTable;
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_ADDRESSMANAGER_HH_ */
diff --git a/src/cpu/testers/gpu_ruby_test/CpuThread.cc b/src/cpu/testers/gpu_ruby_test/CpuThread.cc
new file mode 100644
index 0000000..0e814cf
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/CpuThread.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#include "cpu/testers/gpu_ruby_test/CpuThread.hh"
+
+#include "debug/ProtocolTest.hh"
+
+CpuThread::CpuThread(const Params *p)
+ :Thread(p)
+{
+ threadName = "CpuThread(Thread ID " + std::to_string(threadId) + ")";
+ threadEvent.setDesc("CpuThread tick");
+ assert(numLanes == 1);
+}
+
+CpuThread::~CpuThread()
+{
+}
+
+CpuThread*
+CpuThreadParams::create()
+{
+ return new CpuThread(this);
+}
+
+void
+CpuThread::issueLoadOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::LOAD);
+ // we should not have any outstanding fence or atomic op at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ fatal("CpuThread::issueLoadOps - not yet implemented");
+}
+
+void
+CpuThread::issueStoreOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::STORE);
+ // we should not have any outstanding fence or atomic op at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ fatal("CpuThread::issueStoreOps - not yet implemented");
+}
+
+void
+CpuThread::issueAtomicOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::ATOMIC);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ fatal("CpuThread::issueAtomicOps - not yet implemented");
+}
+
+void
+CpuThread::issueAcquireOp()
+{
+ DPRINTF(ProtocolTest, "Issuing Acquire Op ...\n");
+
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ // no-op: Acquire does not apply to CPU threads
+}
+
+void
+CpuThread::issueReleaseOp()
+{
+ DPRINTF(ProtocolTest, "Issuing Release Op ...\n");
+
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::RELEASE);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ // no-op: Release does not apply to CPU threads
+}
+
+void
+CpuThread::hitCallback(PacketPtr pkt)
+{
+ fatal("CpuThread::hitCallback - not yet implemented");
+}
diff --git a/src/cpu/testers/gpu_ruby_test/CpuThread.hh b/src/cpu/testers/gpu_ruby_test/CpuThread.hh
new file mode 100644
index 0000000..42441af
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/CpuThread.hh
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_CPUTHREAD_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_CPUTHREAD_HH_
+
+#include "cpu/testers/gpu_ruby_test/Thread.hh"
+#include "params/CpuThread.hh"
+#include "sim/clocked_object.hh"
+
+class CpuThread : public Thread
+{
+ public:
+ typedef CpuThreadParams Params;
+ CpuThread(const Params *p);
+ virtual ~CpuThread();
+
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ void hitCallback(PacketPtr pkt);
+
+ protected:
+ void issueLoadOps();
+ void issueStoreOps();
+ void issueAtomicOps();
+ void issueAcquireOp();
+ void issueReleaseOp();
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_CPUTHREAD_HH_ */
diff --git a/src/cpu/testers/gpu_ruby_test/CpuThread.py b/src/cpu/testers/gpu_ruby_test/CpuThread.py
new file mode 100644
index 0000000..a0f04e7
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/CpuThread.py
@@ -0,0 +1,43 @@
+#
+# Copyright (c) 2017 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Tuan Ta
+#
+
+from m5.params import *
+from m5.proxy import *
+
+from Thread import Thread
+
+class CpuThread(Thread):
+ type = 'CpuThread'
+ cxx_header = "cpu/testers/gpu_ruby_test/CpuThread.hh"
diff --git a/src/cpu/testers/gpu_ruby_test/Episode.cc b/src/cpu/testers/gpu_ruby_test/Episode.cc
new file mode 100644
index 0000000..fba21f5
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/Episode.cc
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#include "cpu/testers/gpu_ruby_test/Episode.hh"
+
+#include <fstream>
+#include <unordered_set>
+
+#include "cpu/testers/gpu_ruby_test/ProtocolTester.hh"
+#include "cpu/testers/gpu_ruby_test/Thread.hh"
+
+Episode::Episode(ProtocolTester* _tester, Thread* _thread, int num_loads,
+ int num_stores)
+ : tester(_tester),
+ thread(_thread),
+ numLoads(num_loads),
+ numStores(num_stores),
+ nextActionIdx(0)
+{
+ assert(tester && thread);
+
+ episodeId = tester->getNextEpisodeID();
+ numLanes = thread->getNumLanes();
+ assert(numLanes > 0);
+
+ addrManager = tester->getAddressManager();
+ assert(addrManager);
+
+ atomicLocs.resize(numLanes, AddressManager::INVALID_LOCATION);
+ // generate a sequence of actions
+ initActions();
+ isActive = true;
+
+ DPRINTFN("Episode %d\n", episodeId);
+}
+
+Episode::~Episode()
+{
+ for (Episode::Action* action : actions) {
+ assert(action);
+ delete action;
+ }
+}
+
+const Episode::Action*
+Episode::peekCurAction() const
+{
+ if (nextActionIdx < actions.size())
+ return actions[nextActionIdx];
+ else
+ return nullptr;
+}
+
+void
+Episode::popAction()
+{
+ assert(nextActionIdx < actions.size());
+ nextActionIdx++;
+}
+
+void
+Episode::initActions()
+{
+ // first, push Atomic & then Acquire action
+ actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
+ actions.push_back(new Action(Action::Type::ACQUIRE, numLanes));
+
+ // second, push a number of LD/ST actions
+ int num_loads = numLoads;
+ int num_stores = numStores;
+ while ((num_loads + num_stores) > 0) {
+ switch (random() % 2) {
+ case 0: // Load
+ if (num_loads > 0) {
+ actions.push_back(new Action(Action::Type::LOAD,
+ numLanes));
+ num_loads--;
+ }
+ break;
+ case 1: // Store
+ if (num_stores > 0) {
+ actions.push_back(new Action(Action::Type::STORE,
+ numLanes));
+ num_stores--;
+ }
+ break;
+ default:
+ assert(false);
+ }
+ }
+
+ // last, push an Release & then Atomic action
+ actions.push_back(new Action(Action::Type::RELEASE, numLanes));
+ actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
+
+ // for each lane, pick a list of locations
+ Location normal_loc;
+
+ for (int lane = 0; lane < numLanes; ++lane) {
+ normal_loc = AddressManager::INVALID_LOCATION;
+
+ // first, we select atomic loc for this lane
+ // atomic loc for this lane should not have been picked yet
+ assert(atomicLocs[lane] == AddressManager::INVALID_LOCATION);
+ // pick randomly an atomic location
+ atomicLocs[lane] = addrManager->getAtomicLoc();
+ assert(atomicLocs[lane] >= 0);
+
+ // go through each action in this lane and set its location
+ for (Action* action : actions) {
+ assert(action);
+
+ switch (action->getType()) {
+ case Action::Type::ATOMIC:
+ action->setLocation(lane, atomicLocs[lane]);
+ break;
+ case Action::Type::LOAD:
+ // pick randomly a normal location
+ normal_loc = addrManager->
+ getLoadLoc(atomicLocs[lane]);
+ assert(normal_loc >= AddressManager::INVALID_LOCATION);
+
+ if (normal_loc != AddressManager::INVALID_LOCATION) {
+ // check DRF
+ if (!tester->checkDRF(atomicLocs[lane],
+ normal_loc, false) ||
+ !this->checkDRF(atomicLocs[lane], normal_loc,
+ false, lane)) {
+ panic("Thread %d - Data race detected. STOPPED!\n",
+ thread->getThreadId());
+ }
+ }
+
+ action->setLocation(lane, normal_loc);
+ break;
+ case Action::Type::STORE:
+ // pick randomly a normal location
+ normal_loc = addrManager->
+ getStoreLoc(atomicLocs[lane]);
+ assert(normal_loc >= AddressManager::INVALID_LOCATION);
+
+ if (normal_loc != AddressManager::INVALID_LOCATION) {
+ // check DRF
+ if (!tester->checkDRF(atomicLocs[lane],
+ normal_loc, true) ||
+ !this->checkDRF(atomicLocs[lane], normal_loc,
+ true, lane)) {
+ panic("Thread %d - Data race detected. STOPPED!\n",
+ thread->getThreadId());
+ }
+ }
+
+ action->setLocation(lane, normal_loc);
+ break;
+ case Action::Type::ACQUIRE:
+ case Action::Type::RELEASE:
+ // no op
+ break;
+ default:
+ panic("Invalid action type\n");
+ }
+ }
+
+ addrManager->finishLocSelection(atomicLocs[lane]);
+ }
+}
+
+void
+Episode::completeEpisode()
+{
+ // release all locations this episode has picked and used
+ Location atomic_loc, normal_loc;
+ for (int lane = 0; lane < numLanes; ++lane) {
+ atomic_loc = AddressManager::INVALID_LOCATION;
+ normal_loc = AddressManager::INVALID_LOCATION;
+
+ std::unordered_set<Location> unique_loc_set;
+
+ for (Action* action : actions) {
+ assert(action);
+
+ if (action->isAtomicAction()) {
+ if (atomic_loc == AddressManager::INVALID_LOCATION) {
+ atomic_loc = action->getLocation(lane);
+ } else {
+ // both atomic ops in the same lane must be
+ // at the same location
+ assert(atomic_loc == action->getLocation(lane));
+ }
+ } else if (!action->isMemFenceAction()) {
+ assert(atomic_loc >= 0);
+ normal_loc = action->getLocation(lane);
+
+ if (normal_loc >= 0)
+ unique_loc_set.insert(normal_loc);
+ }
+ }
+
+ // each unique loc can be released only once
+ for (Location loc : unique_loc_set)
+ addrManager->releaseLocation(atomic_loc, loc);
+ }
+
+ // this episode is no longer active
+ isActive = false;
+}
+
+bool
+Episode::checkDRF(Location atomic_loc, Location loc, bool isStore,
+ int max_lane) const
+{
+ assert(atomic_loc != AddressManager::INVALID_LOCATION);
+ assert(loc != AddressManager::INVALID_LOCATION);
+ assert(max_lane <= numLanes);
+
+ for (int lane = 0; lane < max_lane; ++lane) {
+ if (atomic_loc == atomicLocs[lane]) {
+ for (const Action* action : actions) {
+ if (!action->isAtomicAction() &&
+ !action->isMemFenceAction()) {
+ if (isStore && loc == action->getLocation(lane)) {
+ warn("ST at location %d races against thread %d\n",
+ loc, thread->getThreadId());
+ return false;
+ } else if (!isStore &&
+ action->getType() == Action::Type::STORE &&
+ loc == action->getLocation(lane)) {
+ warn("LD at location %d races against thread %d\n",
+ loc, thread->getThreadId());
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+// -------------------- Action class ----------------------------
+Episode::Action::Action(Type t, int num_lanes)
+ : type(t),
+ numLanes(num_lanes)
+{
+ assert(numLanes > 0);
+ locations.resize(numLanes);
+ for (Location &loc : locations) loc = AddressManager::INVALID_LOCATION;
+}
+
+void
+Episode::Action::setLocation(int lane, Location loc)
+{
+ assert(lane >= 0 && lane < numLanes);
+ locations[lane] = loc;
+}
+
+AddressManager::Location
+Episode::Action::getLocation(int lane) const
+{
+ assert(lane >= 0 && lane < numLanes);
+ return locations[lane];
+}
+
+bool
+Episode::Action::isAtomicAction() const
+{
+ return (type == Type::ATOMIC);
+}
+
+bool
+Episode::Action::isMemFenceAction() const
+{
+ return (type == Type::ACQUIRE || type == Type::RELEASE);
+}
+
+const std::string
+Episode::Action::printType() const
+{
+ if (type == Type::ACQUIRE)
+ return "ACQUIRE";
+ else if (type == Type::RELEASE)
+ return "RELEASE";
+ else if (type == Type::ATOMIC)
+ return "ATOMIC";
+ else if (type == Type::LOAD)
+ return "LOAD";
+ else if (type == Type::STORE)
+ return "STORE";
+ else
+ panic("Invalid action type\n");
+}
diff --git a/src/cpu/testers/gpu_ruby_test/Episode.hh b/src/cpu/testers/gpu_ruby_test/Episode.hh
new file mode 100644
index 0000000..8edd803
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/Episode.hh
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
+
+#include <vector>
+
+#include "cpu/testers/gpu_ruby_test/AddressManager.hh"
+
+class ProtocolTester;
+class Thread;
+
+class Episode
+{
+ public:
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ class Action {
+ public:
+ enum class Type {
+ ACQUIRE,
+ RELEASE,
+ ATOMIC,
+ LOAD,
+ STORE,
+ };
+
+ Action(Type t, int num_lanes);
+ ~Action() {}
+
+ Type getType() const { return type; }
+ void setLocation(int lane, Location loc);
+ Location getLocation(int lane) const;
+ bool isAtomicAction() const;
+ bool isMemFenceAction() const;
+ const std::string printType() const;
+
+ private:
+ Type type;
+ int numLanes;
+ typedef std::vector<Location> LocationList;
+ LocationList locations;
+ };
+
+ Episode(ProtocolTester* tester, Thread* thread, int num_loads,
+ int num_stores);
+ ~Episode();
+
+ // return episode id
+ int getEpisodeId() const { return episodeId; }
+ // return the action at the head of the action queue
+ const Action* peekCurAction() const;
+ // pop the action at the head of the action queue
+ void popAction();
+ // check if there is more action to be issued in this episode
+ bool hasMoreActions() const { return nextActionIdx < actions.size();}
+ // complete this episode by releasing all locations & updating st effects
+ void completeEpisode();
+ // check if this episode is executing
+ bool isEpsActive() const { return isActive; }
+ // check if the input episode and this one have any data race
+ bool checkDRF(Location atomic_loc, Location loc, bool isStore,
+ int max_lane) const;
+
+ private:
+ // pointers to tester, thread and address amanger structures
+ ProtocolTester *tester;
+ Thread *thread;
+ AddressManager *addrManager;
+
+ // a unique episode id
+ int episodeId;
+ // list of actions in this episode
+ typedef std::vector<Action*> ActionList;
+ ActionList actions;
+ // list of atomic locations picked for this episode
+ typedef std::vector<Location> AtomicLocationList;
+ AtomicLocationList atomicLocs;
+
+ // is a thread running this episode?
+ bool isActive;
+ // episode length = num_loads + num_stores
+ int numLoads;
+ int numStores;
+ // index of the next action in actions
+ int nextActionIdx;
+ // number of lanes in this thread
+ int numLanes;
+
+ // randomly generate actions in this episode
+ void initActions();
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_ */
diff --git a/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc b/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
new file mode 100644
index 0000000..82b0d3b
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
@@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#include "cpu/testers/gpu_ruby_test/GpuWavefront.hh"
+
+#include "debug/ProtocolTest.hh"
+
+GpuWavefront::GpuWavefront(const Params *p)
+ : Thread(p)
+{
+ cuId = p->cu_id;
+ threadName = "GpuWavefront(Thread ID = " + std::to_string(threadId) +
+ ", CU ID = " + std::to_string(cuId) + ")";
+ threadEvent.setDesc("GpuWavefront tick");
+}
+
+GpuWavefront::~GpuWavefront()
+{
+
+}
+
+GpuWavefront*
+GpuWavefrontParams::create()
+{
+ return new GpuWavefront(this);
+}
+
+void
+GpuWavefront::issueLoadOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::LOAD);
+ // we should not have any outstanding fence or atomic op at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ for (int lane = 0; lane < numLanes; ++lane) {
+ Location location = curAction->getLocation(lane);
+ assert(location >= AddressManager::INVALID_LOCATION);
+
+ if (location >= 0) {
+ Addr address = addrManager->getAddress(location);
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
+ this->getName(), curEpisode->getEpisodeId(),
+ printAddress(address));
+
+ int load_size = sizeof(Value);
+
+ // for now, assert address is 4-byte aligned
+ assert(address % load_size == 0);
+
+ Request *req = new Request(0, // asid
+ address, // virtual addr
+ load_size, // size in bytes
+ 0, // flags
+ tester->masterId(), // port id
+ 0, // pc
+ threadId, // thread_id
+ 0);
+ req->setPaddr(address);
+ req->setReqInstSeqNum(tester->getActionSeqNum());
+ // set protocol-specific flags
+ setExtraRequestFlags(req);
+
+ PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
+ uint8_t* data = new uint8_t[load_size];
+ pkt->dataDynamic(data);
+ pkt->senderState = new ProtocolTester::SenderState(this);
+
+ if (!port->sendTimingReq(pkt)) {
+ panic("Not expected failed sendTimingReq\n");
+ }
+
+ // insert an outstanding load
+ addOutstandingReqs(outstandingLoads, address, lane, location);
+
+ // increment the number of outstanding ld_st requests
+ pendingLdStCount++;
+ }
+ }
+}
+
+void
+GpuWavefront::issueStoreOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::STORE);
+ // we should not have any outstanding fence or atomic op at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ for (int lane = 0; lane < numLanes; ++lane) {
+ Location location = curAction->getLocation(lane);
+ assert(location >= AddressManager::INVALID_LOCATION);
+
+ if (location >= 0) {
+ // prepare the next value to store
+ Value new_value = addrManager->getLoggedValue(location) + 1;
+
+ Addr address = addrManager->getAddress(location);
+ // must be aligned with store size
+ assert(address % sizeof(Value) == 0);
+
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
+ "Value %d\n", this->getName(),
+ curEpisode->getEpisodeId(), printAddress(address),
+ new_value);
+
+ Request *req = new Request(0, // asid
+ address, // virtual addr
+ sizeof(Value), // size in bytes
+ 0, // flags
+ tester->masterId(), // port id
+ 0, // pc
+ threadId, // thread_id
+ 0);
+ req->setPaddr(address);
+ req->setReqInstSeqNum(tester->getActionSeqNum());
+ // set protocol-specific flags
+ setExtraRequestFlags(req);
+
+ PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
+ uint8_t *writeData = new uint8_t[sizeof(Value)];
+ for (int j = 0; j < sizeof(Value); ++j) {
+ writeData[j] = ((uint8_t*)&new_value)[j];
+ }
+ pkt->dataDynamic(writeData);
+ pkt->senderState = new ProtocolTester::SenderState(this);
+
+ if (!port->sendTimingReq(pkt)) {
+ panic("Not expecting a failed sendTimingReq\n");
+ }
+
+ // add an outstanding store
+ addOutstandingReqs(outstandingStores, address, lane, location,
+ new_value);
+
+ // increment the number of outstanding ld_st requests
+ pendingLdStCount++;
+ }
+ }
+}
+
+void
+GpuWavefront::issueAtomicOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::ATOMIC);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ // we use atomic_inc in the tester
+ Request::Flags flags = Request::ATOMIC_RETURN_OP;
+
+ for (int lane = 0; lane < numLanes; ++lane) {
+ Location location = curAction->getLocation(lane);
+ assert(location >= 0);
+
+ Addr address = addrManager->getAddress(location);
+
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
+ this->getName(), curEpisode->getEpisodeId(),
+ printAddress(address));
+
+ // must be aligned with store size
+ assert(address % sizeof(Value) == 0);
+ Request *req = new Request(0, // asid
+ address, // virtual addr
+ sizeof(Value), // size in bytes
+ flags, // flags
+ tester->masterId(), // port id
+ 0, // pc
+ threadId, // thread_id
+ new AtomicOpInc<Value>());
+ req->setPaddr(address);
+ req->setReqInstSeqNum(tester->getActionSeqNum());
+ // set protocol-specific flags
+ setExtraRequestFlags(req);
+
+ PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
+ uint8_t* data = new uint8_t[sizeof(Value)];
+ pkt->dataDynamic(data);
+ pkt->senderState = new ProtocolTester::SenderState(this);
+
+ if (!port->sendTimingReq(pkt)) {
+ panic("Not expecting failed sendTimingReq\n");
+ }
+
+ // add an outstanding atomic
+ addOutstandingReqs(outstandingAtomics, address, lane, location);
+
+ // increment the number of outstanding atomic ops
+ pendingAtomicCount++;
+ }
+}
+
+void
+GpuWavefront::issueAcquireOp()
+{
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Acquire\n", this->getName(),
+ curEpisode->getEpisodeId());
+
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ Request *acq_req = new Request(0,
+ 0, // vaddr
+ 0, // request size
+ 0, // flags
+ tester->masterId(),
+ 0,
+ threadId,
+ 0);
+ acq_req->setPaddr(0);
+ acq_req->setReqInstSeqNum(tester->getActionSeqNum());
+ // set protocol-specific flags
+ setExtraRequestFlags(acq_req);
+
+ PacketPtr pkt = new Packet(acq_req, MemCmd::MemSyncReq);
+ pkt->senderState = new ProtocolTester::SenderState(this);
+
+ if (!port->sendTimingReq(pkt)) {
+ panic("Not expecting failed sendTimingReq\n");
+ }
+
+ // increment the number of outstanding fence requests
+ pendingFenceCount++;
+}
+
+void
+GpuWavefront::issueReleaseOp()
+{
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Release\n", this->getName(),
+ curEpisode->getEpisodeId());
+
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::RELEASE);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ Request *rel_req = new Request(0,
+ 0, // vaddr
+ 0, // request size
+ 0, // flags
+ tester->masterId(),
+ 0,
+ threadId,
+ 0);
+ rel_req->setPaddr(0);
+ rel_req->setReqInstSeqNum(tester->getActionSeqNum());
+ // set protocol-specific flags
+ setExtraRequestFlags(rel_req);
+
+ PacketPtr pkt = new Packet(rel_req, MemCmd::MemSyncReq);
+ pkt->senderState = new ProtocolTester::SenderState(this);
+
+ if (!port->sendTimingReq(pkt)) {
+ panic("Not expecting failed sendTimingReq\n");
+ }
+
+ // increment the number of outstanding fence requests
+ pendingFenceCount++;
+}
+
+void
+GpuWavefront::hitCallback(PacketPtr pkt)
+{
+ assert(pkt);
+ MemCmd resp_cmd = pkt->cmd;
+ Addr addr = pkt->getAddr();
+
+ DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
+ "Addr %s\n", this->getName(),
+ curEpisode->getEpisodeId(), resp_cmd.toString(),
+ printAddress(addr));
+
+ // whether the transaction is done after this hitCallback
+ bool isTransactionDone = true;
+
+ if (resp_cmd == MemCmd::MemSyncResp) {
+ // response to a pending fence
+ // no validation needed for fence responses
+ assert(pendingFenceCount > 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+ pendingFenceCount--;
+ } else if (resp_cmd == MemCmd::ReadResp) {
+ // response to a pending read
+ assert(pendingLdStCount > 0);
+ assert(pendingAtomicCount == 0);
+ assert(outstandingLoads.count(addr) > 0);
+
+ // get return data
+ Value value = *(pkt->getPtr<Value>());
+ OutstandingReq req = popOutstandingReq(outstandingLoads, addr);
+ validateLoadResp(req.origLoc, req.lane, value);
+
+ // this Read is done
+ pendingLdStCount--;
+ } else if (resp_cmd == MemCmd::WriteResp) {
+ // response to a pending write
+ assert(pendingLdStCount > 0);
+ assert(pendingAtomicCount == 0);
+
+ // no need to validate Write response
+ // just pop it from the outstanding req table so that subsequent
+ // requests dependent on this write can proceed
+ // note that we don't decrement pendingLdStCount here yet since
+ // the write is not yet completed in downstream memory. Instead, we
+ // decrement the counter when we receive the write completion ack
+ assert(outstandingStores.count(addr) > 0);
+ OutstandingReq req = popOutstandingReq(outstandingStores, addr);
+ assert(req.storedValue != AddressManager::INVALID_VALUE);
+
+ // update log table
+ addrManager->updateLogTable(req.origLoc, threadId,
+ curEpisode->getEpisodeId(),
+ req.storedValue,
+ curTick(),
+ cuId);
+
+ // the transaction is not done yet. Waiting for write completion ack
+ isTransactionDone = false;
+ } else if (resp_cmd == MemCmd::SwapResp) {
+ // response to a pending atomic
+ assert(pendingAtomicCount > 0);
+ assert(pendingLdStCount == 0);
+ assert(outstandingAtomics.count(addr) > 0);
+
+ // get return data
+ Value value = *(pkt->getPtr<Value>());
+
+ // validate atomic op return
+ OutstandingReq req = popOutstandingReq(outstandingAtomics, addr);
+ validateAtomicResp(req.origLoc, req.lane, value);
+
+ // update log table
+ addrManager->updateLogTable(req.origLoc, threadId,
+ curEpisode->getEpisodeId(), value,
+ curTick(),
+ cuId);
+
+ // this Atomic is done
+ pendingAtomicCount--;
+ } else if (resp_cmd == MemCmd::MessageResp) {
+ // write completion ACK
+ assert(pendingLdStCount > 0);
+ assert(pendingAtomicCount == 0);
+
+ // the Write is now done
+ pendingLdStCount--;
+ } else {
+ panic("Unsupported MemCmd response type");
+ }
+
+ if (isTransactionDone) {
+ // no need to keep senderState and request around
+ delete pkt->senderState;
+ delete pkt->req;
+ }
+
+ delete pkt;
+
+ // record the last active cycle to check for deadlock
+ lastActiveCycle = curCycle();
+
+ // we may be able to issue an action. Let's check
+ if (!threadEvent.scheduled()) {
+ scheduleWakeup();
+ }
+}
+
+void
+GpuWavefront::setExtraRequestFlags(Request* req)
+{
+ // No extra request flag is set
+}
diff --git a/src/cpu/testers/gpu_ruby_test/GpuWavefront.hh b/src/cpu/testers/gpu_ruby_test/GpuWavefront.hh
new file mode 100644
index 0000000..96c9f37
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/GpuWavefront.hh
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPUWAVEFRONT_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_GPUWAVEFRONT_HH_
+
+#include "cpu/testers/gpu_ruby_test/Thread.hh"
+#include "params/GpuWavefront.hh"
+#include "sim/clocked_object.hh"
+
+class GpuWavefront : public Thread
+{
+ public:
+ typedef GpuWavefrontParams Params;
+ GpuWavefront(const Params *p);
+ virtual ~GpuWavefront();
+
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ virtual void hitCallback(PacketPtr pkt);
+
+ protected:
+ void issueLoadOps();
+ void issueStoreOps();
+ void issueAtomicOps();
+ // acquire and release ops are protocol-specific, so their issue functions
+ // may be redefined by a child class of GpuWavefront
+ virtual void issueAcquireOp();
+ virtual void issueReleaseOp();
+ // set extra request flags that is specific to a target protocol
+ virtual void setExtraRequestFlags(Request* req);
+
+ protected:
+ int cuId; // compute unit associated with this wavefront
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPUWAVEFRONT_HH_ */
diff --git a/src/cpu/testers/gpu_ruby_test/GpuWavefront.py b/src/cpu/testers/gpu_ruby_test/GpuWavefront.py
new file mode 100644
index 0000000..a55f7f0
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/GpuWavefront.py
@@ -0,0 +1,44 @@
+#
+# Copyright (c) 2017 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Tuan Ta
+#
+
+from m5.params import *
+from m5.proxy import *
+
+from Thread import Thread
+
+class GpuWavefront(Thread):
+ type = 'GpuWavefront'
+ cxx_header = "cpu/testers/gpu_ruby_test/GpuWavefront.hh"
+ cu_id = Param.Int("Compute Unit ID")
diff --git a/src/cpu/testers/gpu_ruby_test/ProtocolTester.cc b/src/cpu/testers/gpu_ruby_test/ProtocolTester.cc
new file mode 100644
index 0000000..5184df7
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/ProtocolTester.cc
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#include "cpu/testers/gpu_ruby_test/ProtocolTester.hh"
+
+#include <algorithm>
+#include <ctime>
+#include <fstream>
+#include <random>
+
+#include "cpu/testers/gpu_ruby_test/CpuThread.hh"
+#include "cpu/testers/gpu_ruby_test/GpuWavefront.hh"
+#include "cpu/testers/gpu_ruby_test/Thread.hh"
+#include "debug/ProtocolTest.hh"
+#include "mem/request.hh"
+#include "sim/sim_exit.hh"
+#include "sim/system.hh"
+
+ProtocolTester::ProtocolTester(const Params *p)
+ : MemObject(p),
+ _masterId(p->system->getMasterId(name())),
+ numCpuPorts(p->port_cpu_ports_connection_count),
+ numVectorPorts(p->port_cu_vector_ports_connection_count),
+ numSqcPorts(p->port_cu_sqc_ports_connection_count),
+ numScalarPorts(p->port_cu_scalar_ports_connection_count),
+ numCusPerSqc(p->cus_per_sqc),
+ numCusPerScalar(p->cus_per_scalar),
+ numWfsPerCu(p->wavefronts_per_cu),
+ numWisPerWf(p->workitems_per_wavefront),
+ numAtomicLocs(p->num_atomic_locations),
+ numNormalLocsPerAtomic(p->num_normal_locs_per_atomic),
+ episodeLength(p->episode_length),
+ maxNumEpisodes(p->max_num_episodes),
+ debugTester(p->debug_tester),
+ cpuThreads(p->cpu_threads),
+ wfs(p->wavefronts)
+{
+ int idx = 0; // global port index
+
+ numCpus = numCpuPorts; // 1 cpu port per CPU
+ numCus = numVectorPorts; // 1 vector port per CU
+
+ // create all physical cpu's data ports
+ for (int i = 0; i < numCpuPorts; ++i) {
+ DPRINTF(ProtocolTest, "Creating %s\n",
+ csprintf("%s-cpuPort%d", name(), i));
+ cpuPorts.push_back(new SeqPort(csprintf("%s-cpuPort%d", name(), i),
+ this, i, idx));
+ idx++;
+ }
+
+ // create all physical gpu's data ports
+ for (int i = 0; i < numVectorPorts; ++i) {
+ DPRINTF(ProtocolTest, "Creating %s\n",
+ csprintf("%s-cuVectorPort%d", name(), i));
+ cuVectorPorts.push_back(new SeqPort(csprintf("%s-cuVectorPort%d",
+ name(), i),
+ this, i, idx));
+ idx++;
+ }
+
+ for (int i = 0; i < numScalarPorts; ++i) {
+ DPRINTF(ProtocolTest, "Creating %s\n",
+ csprintf("%s-cuScalarPort%d", name(), i));
+ cuScalarPorts.push_back(new SeqPort(csprintf("%s-cuScalarPort%d",
+ name(), i),
+ this, i, idx));
+ idx++;
+ }
+
+ for (int i = 0; i < numSqcPorts; ++i) {
+ DPRINTF(ProtocolTest, "Creating %s\n",
+ csprintf("%s-cuSqcPort%d", name(), i));
+ cuSqcPorts.push_back(new SeqPort(csprintf("%s-cuSqcPort%d",
+ name(), i),
+ this, i, idx));
+ idx++;
+ }
+
+ // create an address manager
+ addrManager = new AddressManager(numAtomicLocs,
+ numNormalLocsPerAtomic);
+ nextEpisodeId = 0;
+
+ if (!debugTester)
+ warn("Data race check is not enabled\n");
+
+ sentExitSignal = false;
+
+ // set random seed number
+ if (p->random_seed != 0) {
+ srand(p->random_seed);
+ } else {
+ srand(time(NULL));
+ }
+
+ actionCount = 0;
+
+ // create a new log file
+ logFile = simout.create(p->log_file);
+ assert(logFile);
+
+ // print test configs
+ std::stringstream ss;
+ ss << "GPU Ruby test's configurations" << std::endl
+ << "\tNumber of CPUs: " << numCpus << std::endl
+ << "\tNumber of CUs: " << numCus << std::endl
+ << "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl
+ << "\tWavefront size: " << numWisPerWf << std::endl
+ << "\tNumber of atomic locations: " << numAtomicLocs << std::endl
+ << "\tNumber of non-atomic locations: "
+ << numNormalLocsPerAtomic * numAtomicLocs << std::endl
+ << "\tEpisode length: " << episodeLength << std::endl
+ << "\tTest length (max number of episodes): " << maxNumEpisodes
+ << std::endl
+ << "\tRandom seed: " << p->random_seed
+ << std::endl;
+
+ ccprintf(*(logFile->stream()), "%s", ss.str());
+ logFile->stream()->flush();
+}
+
+ProtocolTester::~ProtocolTester()
+{
+ for (int i = 0; i < cpuPorts.size(); ++i)
+ delete cpuPorts[i];
+ for (int i = 0; i < cuVectorPorts.size(); ++i)
+ delete cuVectorPorts[i];
+ for (int i = 0; i < cuScalarPorts.size(); ++i)
+ delete cuScalarPorts[i];
+ for (int i = 0; i < cuSqcPorts.size(); ++i)
+ delete cuSqcPorts[i];
+ delete addrManager;
+
+ // close the log file
+ simout.close(logFile);
+}
+
+void
+ProtocolTester::init()
+{
+ DPRINTF(ProtocolTest, "Attach threads to ports\n");
+
+ // connect cpu threads to cpu's ports
+ for (int cpu_id = 0; cpu_id < numCpus; ++cpu_id) {
+ cpuThreads[cpu_id]->attachThreadToPorts(this,
+ static_cast<SeqPort*>(cpuPorts[cpu_id]));
+ cpuThreads[cpu_id]->scheduleWakeup();
+ cpuThreads[cpu_id]->scheduleDeadlockCheckEvent();
+ }
+
+ // connect gpu wavefronts to gpu's ports
+ int wfId = 0;
+ int vectorPortId = 0;
+ int sqcPortId = 0;
+ int scalarPortId = 0;
+
+ for (int cu_id = 0; cu_id < numCus; ++cu_id) {
+ vectorPortId = cu_id;
+ sqcPortId = cu_id/numCusPerSqc;
+ // no scalar port if 'numCusPerScalar' is '0'
+ if (numCusPerScalar != 0)
+ scalarPortId = cu_id/numCusPerScalar;
+
+ for (int i = 0; i < numWfsPerCu; ++i) {
+ wfId = cu_id * numWfsPerCu + i;
+ wfs[wfId]->attachThreadToPorts(this,
+ static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
+ static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
+ !numCusPerScalar ? nullptr :
+ static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
+ wfs[wfId]->scheduleWakeup();
+ wfs[wfId]->scheduleDeadlockCheckEvent();
+ }
+ }
+}
+
+BaseMasterPort &
+ProtocolTester::getMasterPort(const std::string & if_name, PortID idx)
+{
+ if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
+ if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
+ // pass along to super class
+ return MemObject::getMasterPort(if_name, idx);
+ } else {
+ if (if_name == "cpu_ports") {
+ if (idx > numCpuPorts)
+ panic("ProtocolTester: unknown cpu port %d\n", idx);
+ return *cpuPorts[idx];
+ } else if (if_name == "cu_vector_ports") {
+ if (idx > numVectorPorts)
+ panic("ProtocolTester: unknown cu vect port %d\n", idx);
+ return *cuVectorPorts[idx];
+ } else if (if_name == "cu_sqc_ports") {
+ if (idx > numSqcPorts)
+ panic("ProtocolTester: unknown cu sqc port %d\n", idx);
+ return *cuSqcPorts[idx];
+ } else {
+ assert(if_name == "cu_scalar_ports");
+ if (idx > numScalarPorts)
+ panic("ProtocolTester: unknown cu scal port %d\n", idx);
+ return *cuScalarPorts[idx];
+ }
+ }
+
+ assert(false);
+}
+
+bool
+ProtocolTester::checkExit()
+{
+ if (nextEpisodeId > maxNumEpisodes) {
+ if (!sentExitSignal) {
+ // all done
+ inform("Total completed episodes: %d\n", nextEpisodeId - 1);
+ inform("Protocol Test: Passed!\n");
+ exitSimLoop("ProtocolTester completed!");
+ sentExitSignal = true;
+ }
+ return true;
+ }
+ return false;
+}
+
+bool
+ProtocolTester::checkDRF(Location atomic_loc,
+ Location loc, bool isStore) const
+{
+ if (debugTester) {
+ // go through all active episodes in all threads
+ for (const Thread* th : wfs) {
+ if (!th->checkDRF(atomic_loc, loc, isStore))
+ return false;
+ }
+
+ for (const Thread* th : cpuThreads) {
+ if (!th->checkDRF(atomic_loc, loc, isStore))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void
+ProtocolTester::dumpErrorLog(std::stringstream& ss)
+{
+ if (!sentExitSignal) {
+ // go through all threads and dump their outstanding requests
+ for (auto t : cpuThreads) {
+ t->printAllOutstandingReqs(ss);
+ }
+
+ for (auto t : wfs) {
+ t->printAllOutstandingReqs(ss);
+ }
+
+ // dump error log into a file
+ assert(logFile);
+ ccprintf(*(logFile->stream()), "%s", ss.str());
+ logFile->stream()->flush();
+
+ // exit the sim loop
+ exitSimLoop("GPU Ruby Tester: Failed!", -1);
+ sentExitSignal = true;
+ }
+}
+
+bool
+ProtocolTester::SeqPort::recvTimingResp(PacketPtr pkt)
+{
+ // get the requesting thread from the original sender state
+ ProtocolTester::SenderState* senderState =
+ safe_cast<ProtocolTester::SenderState*>(pkt->senderState);
+ Thread *th = senderState->th;
+
+ th->hitCallback(pkt);
+
+ return true;
+}
+
+ProtocolTester*
+ProtocolTesterParams::create()
+{
+ return new ProtocolTester(this);
+}
diff --git a/src/cpu/testers/gpu_ruby_test/ProtocolTester.hh b/src/cpu/testers/gpu_ruby_test/ProtocolTester.hh
new file mode 100644
index 0000000..d6b356d
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/ProtocolTester.hh
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_PROTOCOLTESTER_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_PROTOCOLTESTER_HH_
+
+/*
+ * The tester includes the main ProtocolTester that manages all ports to the
+ * memory system.
+ * Threads are mapped to certain data port(s)
+ *
+ * Threads inject memory requests through their data ports.
+ * The tester receives and validates responses from the memory.
+ *
+ * Main components
+ * - AddressManager: generate DRF request streams &
+ * validate data response against an internal log_table
+ * - Episode: a sequence of requests
+ * - Thread: either GPU wavefront or CPU thread
+ */
+
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "base/types.hh"
+#include "cpu/testers/gpu_ruby_test/AddressManager.hh"
+#include "mem/mem_object.hh"
+#include "mem/packet.hh"
+#include "mem/ruby/system/RubyPort.hh"
+#include "params/ProtocolTester.hh"
+
+class Thread;
+class CpuThread;
+class GpuWavefront;
+
+class ProtocolTester : public MemObject
+{
+ public:
+ class SeqPort : public MasterPort
+ {
+ public:
+ SeqPort(const std::string &_name, ProtocolTester *_tester, PortID _id,
+ PortID _index)
+ : MasterPort(_name, _tester, _id)
+ {}
+
+ protected:
+ virtual bool recvTimingResp(PacketPtr pkt);
+ virtual void recvReqRetry()
+ { panic("%s does not expect a retry\n", name()); }
+ };
+
+ struct SenderState : public Packet::SenderState
+ {
+ Thread* th;
+ SenderState(Thread* _th)
+ {
+ assert(_th);
+ th = _th;
+ }
+
+ ~SenderState()
+ {}
+ };
+
+ public:
+ typedef ProtocolTesterParams Params;
+ ProtocolTester(const Params *p);
+ ~ProtocolTester();
+
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ void init();
+ MasterID masterId() { return _masterId; };
+ virtual BaseMasterPort &getMasterPort(const std::string &if_name,
+ PortID idx = InvalidPortID);
+
+ int getEpisodeLength() const { return episodeLength; }
+ // return pointer to the address manager
+ AddressManager* getAddressManager() const { return addrManager; }
+ // return true if the tester should stop issuing new episodes
+ bool checkExit();
+ // verify if a location to be picked for LD/ST will satisfy
+ // data race free requirement
+ bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
+ // return the next episode id and increment it
+ int getNextEpisodeID() { return nextEpisodeId++; }
+ // get action sequence number
+ int getActionSeqNum() { return actionCount++; }
+
+ // dump error log into a file and exit the simulation
+ void dumpErrorLog(std::stringstream& ss);
+
+ private:
+ MasterID _masterId;
+
+ // list of parameters taken from python scripts
+ int numCpuPorts;
+ int numVectorPorts;
+ int numSqcPorts;
+ int numScalarPorts;
+ int numCusPerSqc;
+ int numCusPerScalar;
+ int numWfsPerCu;
+ int numWisPerWf;
+ // parameters controlling the address range that the tester can access
+ int numAtomicLocs;
+ int numNormalLocsPerAtomic;
+ // the number of actions in an episode (episodeLength +- random number)
+ int episodeLength;
+ // the maximum number of episodes to be completed by this tester
+ int maxNumEpisodes;
+ // are we debuggin the tester
+ bool debugTester;
+
+ // all available master ports connected to Ruby
+ std::vector<MasterPort*> cpuPorts; // cpu data ports
+ std::vector<MasterPort*> cuVectorPorts; // ports to GPU vector cache
+ std::vector<MasterPort*> cuSqcPorts; // ports to GPU instruction cache
+ std::vector<MasterPort*> cuScalarPorts; // ports to GPU scalar cache
+ // all CPU and GPU threads
+ std::vector<CpuThread*> cpuThreads;
+ std::vector<GpuWavefront*> wfs;
+
+ // address manager that (1) generates DRF sequences of requests,
+ // (2) manages an internal log table and
+ // (3) validate response data
+ AddressManager* addrManager;
+
+ // number of CPUs and CUs
+ int numCpus;
+ int numCus;
+ // unique id of the next episode
+ int nextEpisodeId;
+
+ // global action count. Overflow is fine. It's used to uniquely identify
+ // per-wave & per-instruction memory requests in the coalescer
+ int actionCount;
+
+ // if an exit signal was already sent
+ bool sentExitSignal;
+
+ OutputStream* logFile;
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_PROTOCOLTESTER_HH_ */
diff --git a/src/cpu/testers/gpu_ruby_test/ProtocolTester.py b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
new file mode 100644
index 0000000..0ee6ca5
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
@@ -0,0 +1,68 @@
+#
+# Copyright (c) 2017 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Tuan Ta
+#
+
+from MemObject import MemObject
+from m5.params import *
+from m5.proxy import *
+
+class ProtocolTester(MemObject):
+ type = 'ProtocolTester'
+ cxx_header = "cpu/testers/gpu_ruby_test/ProtocolTester.hh"
+
+ cpu_ports = VectorMasterPort("Ports for CPUs")
+ cu_vector_ports = VectorMasterPort("Vector ports for GPUs")
+ cu_sqc_ports = VectorMasterPort("SQC ports for GPUs")
+ cu_scalar_ports = VectorMasterPort("Scalar ports for GPUs")
+
+ cus_per_sqc = Param.Int(4, "Number of CUs per SQC")
+ cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache")
+
+ wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU")
+ workitems_per_wavefront = Param.Int(64, "Number of workitems per wf")
+
+ cpu_threads = VectorParam.CpuThread("All cpus")
+ wavefronts = VectorParam.GpuWavefront("All wavefronts")
+
+ num_atomic_locations = Param.Int(2, "Number of atomic locations")
+ num_normal_locs_per_atomic = Param.Int(1000, \
+ "Number of normal locations per atomic")
+
+ episode_length = Param.Int(10, "Number of actions per episode")
+ max_num_episodes = Param.Int(20, "Maximum number of episodes")
+ debug_tester = Param.Bool(False, "Are we debugging the tester?")
+ random_seed = Param.Int(0, "Random seed number. Default value (0) means \
+ using runtime-specific value.")
+ log_file = Param.String("Log file's name")
+ system = Param.System(Parent.any, "System we belong to")
diff --git a/src/cpu/testers/gpu_ruby_test/README b/src/cpu/testers/gpu_ruby_test/README
new file mode 100644
index 0000000..964adb9
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/README
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+This directory contains a tester for gem5 GPU protocols. Unlike the Ruby random
+teter, this tester does not rely on sequential consistency. Instead, it
+assumes tested protocols supports release consistency.
+
+----- Theory Overview -----
+
+The GPU Ruby tester creates a system consisting of both CPU threads and GPU
+wavefronts. CPU threads are scalar, so there is one lane per CPU thread. GPU
+wavefront may have multiple lanes. The number of lanes is initialized when
+a thread/wavefront is created.
+
+Each thread/wavefront executes a number of episodes. Each episode is a series
+of memory actions (i.e., atomic, load, store, acquire and release). In a
+wavefront, all lanes execute the same sequence of actions, but they may target
+different addresses. One can think of an episode as a critical section which
+is bounded by a lock acquire in the beginning and a lock release at the end. An
+episode consists of actions in the following order:
+
+1 - Atomic action
+2 - Acquire action
+3 - A number of load and store actions
+4 - Release action
+5 - Atomic action that targets the same address as (1) does
+
+There are two separate set of addresses: atomic and non-atomic. Atomic actions
+target only atomic addresses. Load and store actions target only non-atomic
+addresses. Memory addresses are all 4-byte aligned in the tester.
+
+To test false sharing cases in which both atomic and non-atomic addresses are
+placed in the same cache line, we abstract out the concept of memory addresses
+from the tester's perspective by introducing the concept of location. Locations
+are numbered from 0 to N-1 (if there are N addresses). The first X locations
+[0..X-1] are atomic locations, and the rest are non-atomic locations.
+The 1-1 mapping between locations and addresses are randomly created when the
+tester is initialized.
+
+Per load and store action, its target location is selected so that there is no
+data race in the generated stream of memory requests at any time during the
+test. Since in Data-Race-Free model, the memory system's behavior is undefined
+in data race cases, we exclude data race scenarios from our protocol test.
+
+Once location per load/store action is determined, each thread/wavefront either
+loads current value at the location or stores an incremental value to that
+location. The tester maintains a table tracking all last writers and their
+written values, so we know what value should be returned from a load and what
+value should be written next at a particular location. Value returned from a
+load must match with the value written by the last writer.
+
+----- Directory Structure -----
+
+ProtocolTester.hh/cc -- This is the main tester class that orchestrates the
+ entire test.
+AddressManager.hh/cc -- This manages address space, randomly maps address to
+ location, generates locations for all episodes,
+ maintains per-location last writer and validates
+ values returned from load actions.
+Thread.hh/cc -- This is abstract class for CPU threads and GPU
+ wavefronts. It generates and executes a series of
+ episodes.
+CpuThread.hh/cc -- Thread class for CPU threads. Not fully implemented yet
+GpuWavefront.hh/cc -- Thread class for GPU wavefronts.
+Episode.hh/cc -- Class to encapsulate an episode, notably including
+ episode load/store structure and ordering.
diff --git a/src/cpu/testers/gpu_ruby_test/SConscript b/src/cpu/testers/gpu_ruby_test/SConscript
new file mode 100644
index 0000000..6e7a815
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/SConscript
@@ -0,0 +1,53 @@
+#
+# Copyright (c) 2017 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Tuan Ta
+#
+
+Import('*')
+
+if env['PROTOCOL'] == 'None':
+ Return()
+
+SimObject('ProtocolTester.py')
+SimObject('Thread.py')
+SimObject('CpuThread.py')
+SimObject('GpuWavefront.py')
+
+Source('AddressManager.cc')
+Source('Episode.cc')
+Source('ProtocolTester.cc')
+Source('Thread.cc')
+Source('CpuThread.cc')
+Source('GpuWavefront.cc')
+
+DebugFlag('ProtocolTest')
diff --git a/src/cpu/testers/gpu_ruby_test/Thread.cc b/src/cpu/testers/gpu_ruby_test/Thread.cc
new file mode 100644
index 0000000..b499c9b
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/Thread.cc
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+#include "cpu/testers/gpu_ruby_test/Thread.hh"
+
+#include <fstream>
+
+#include "debug/ProtocolTest.hh"
+
+Thread::Thread(const Params* p)
+ : ClockedObject(p),
+ threadEvent(this, "Thread tick"),
+ deadlockCheckEvent(this),
+ threadId(p->thread_id),
+ numLanes(p->num_lanes),
+ deadlockThreshold(p->deadlock_threshold)
+{
+ tester = nullptr; // set by attachThreadToPorts()
+ addrManager = nullptr; // set by attachThreadToPorts()
+ port = nullptr; // set by attachThreadToPorts()
+ scalarPort = nullptr; // set by attachThreadToPorts()
+ sqcPort = nullptr; // set by attachThreadToPorts()
+
+ curEpisode = nullptr;
+ curAction = nullptr;
+
+ pendingLdStCount = 0;
+ pendingFenceCount = 0;
+ pendingAtomicCount = 0;
+
+ lastActiveCycle = Cycles(0);
+}
+
+Thread::~Thread()
+{
+ for (auto ep : episodeHistory) {
+ assert(ep != nullptr);
+ delete ep;
+ }
+}
+
+void
+Thread::wakeup()
+{
+ // this thread is waken up by one of the following events
+ // - hitCallback is called
+ // - a new episode is created
+
+ // check if this is the first episode in this thread
+ if (curEpisode == nullptr) {
+ issueNewEpisode();
+ assert(curEpisode);
+ }
+
+ if (isNextActionReady()) {
+ // isNextActionReady should check if the action list is empty
+ assert(curAction != nullptr);
+
+ // issue the next action
+ issueNextAction();
+ } else {
+ // check for completion of the current episode
+ // completion = no outstanding requests + not having more actions
+ if (!curEpisode->hasMoreActions() &&
+ pendingLdStCount == 0 &&
+ pendingFenceCount == 0 &&
+ pendingAtomicCount == 0) {
+
+ curEpisode->completeEpisode();
+
+ // check if it's time to stop the tester
+ if (tester->checkExit()) {
+ // no more event is scheduled for this thread
+ return;
+ }
+
+ // issue the next episode
+ issueNewEpisode();
+ assert(curEpisode);
+
+ // now we get a new episode
+ // let's wake up the thread in the next cycle
+ if (!threadEvent.scheduled()) {
+ scheduleWakeup();
+ }
+ }
+ }
+}
+
+void
+Thread::scheduleWakeup()
+{
+ assert(!threadEvent.scheduled());
+ schedule(threadEvent, nextCycle());
+}
+
+void
+Thread::scheduleDeadlockCheckEvent()
+{
+ // after this first schedule, the deadlock event is scheduled by itself
+ assert(!deadlockCheckEvent.scheduled());
+ schedule(deadlockCheckEvent, nextCycle());
+}
+
+void
+Thread::attachThreadToPorts(ProtocolTester *_tester,
+ ProtocolTester::SeqPort *_port,
+ ProtocolTester::SeqPort *_scalarPort,
+ ProtocolTester::SeqPort *_sqcPort)
+{
+ tester = _tester;
+ port = _port;
+ scalarPort = _scalarPort;
+ sqcPort = _sqcPort;
+
+ assert(tester && port);
+ addrManager = tester->getAddressManager();
+ assert(addrManager);
+}
+
+void
+Thread::issueNewEpisode()
+{
+ int num_reg_loads = random() % tester->getEpisodeLength();
+ int num_reg_stores = tester->getEpisodeLength() - num_reg_loads;
+
+ // create a new episode
+ curEpisode = new Episode(tester, this, num_reg_loads, num_reg_stores);
+ episodeHistory.push_back(curEpisode);
+}
+
+bool
+Thread::isNextActionReady()
+{
+ if (!curEpisode->hasMoreActions()) {
+ return false;
+ } else {
+ curAction = curEpisode->peekCurAction();
+
+ switch(curAction->getType()) {
+ case Episode::Action::Type::ATOMIC:
+ // an atomic action must wait for all previous requests
+ // to complete
+ if (pendingLdStCount == 0 &&
+ pendingFenceCount == 0 &&
+ pendingAtomicCount == 0) {
+ return true;
+ }
+
+ return false;
+ case Episode::Action::Type::ACQUIRE:
+ // we should not see any outstanding ld_st or fence here
+ assert(pendingLdStCount == 0 &&
+ pendingFenceCount == 0);
+
+ // an acquire action must wait for all previous atomic
+ // requests to complete
+ if (pendingAtomicCount == 0) {
+ return true;
+ }
+
+ return false;
+ case Episode::Action::Type::RELEASE:
+ // we should not see any outstanding atomic or fence here
+ assert(pendingAtomicCount == 0 &&
+ pendingFenceCount == 0);
+
+ // a release action must wait for all previous ld/st
+ // requests to complete
+ if (pendingLdStCount == 0) {
+ return true;
+ }
+
+ return false;
+ case Episode::Action::Type::LOAD:
+ case Episode::Action::Type::STORE:
+ // we should not see any outstanding atomic here
+ assert(pendingAtomicCount == 0);
+
+ // can't issue if there is a pending fence
+ if (pendingFenceCount > 0) {
+ return false;
+ }
+
+ // a Load or Store is ready if it doesn't overlap
+ // with any outstanding request
+ for (int lane = 0; lane < numLanes; ++lane) {
+ Location loc = curAction->getLocation(lane);
+
+ if (loc != AddressManager::INVALID_LOCATION) {
+ Addr addr = addrManager->getAddress(loc);
+
+ if (outstandingLoads.find(addr) !=
+ outstandingLoads.end()) {
+ return false;
+ }
+
+ if (outstandingStores.find(addr) !=
+ outstandingStores.end()) {
+ return false;
+ }
+
+ if (outstandingAtomics.find(addr) !=
+ outstandingAtomics.end()) {
+ // this is not an atomic action, so the address
+ // should not be in outstandingAtomics list
+ assert(false);
+ }
+ }
+ }
+
+ return true;
+ default:
+ panic("The tester got an invalid action\n");
+ }
+ }
+}
+
+void
+Thread::issueNextAction()
+{
+ switch(curAction->getType()) {
+ case Episode::Action::Type::ATOMIC:
+ issueAtomicOps();
+ break;
+ case Episode::Action::Type::ACQUIRE:
+ issueAcquireOp();
+ break;
+ case Episode::Action::Type::RELEASE:
+ issueReleaseOp();
+ break;
+ case Episode::Action::Type::LOAD:
+ issueLoadOps();
+ break;
+ case Episode::Action::Type::STORE:
+ issueStoreOps();
+ break;
+ default:
+ panic("The tester got an invalid action\n");
+ }
+
+ // the current action has been issued, pop it from the action list
+ curEpisode->popAction();
+ lastActiveCycle = curCycle();
+
+ // we may be able to schedule the next action
+ // just wake up this thread in the next cycle
+ if (!threadEvent.scheduled()) {
+ scheduleWakeup();
+ }
+}
+
+void
+Thread::addOutstandingReqs(OutstandingReqTable& req_table, Addr address,
+ int lane, Location loc, Value stored_val)
+{
+ OutstandingReqTable::iterator it = req_table.find(address);
+ OutstandingReq req(lane, loc, stored_val, curCycle());
+
+ if (it == req_table.end()) {
+ // insert a new list of requests for this address
+ req_table.insert(std::pair<Addr, OutstandingReqList>(address,
+ OutstandingReqList(1, req)));
+ } else {
+ // add a new request
+ (it->second).push_back(req);
+ }
+}
+
+Thread::OutstandingReq
+Thread::popOutstandingReq(OutstandingReqTable& req_table, Addr addr)
+{
+ OutstandingReqTable::iterator it = req_table.find(addr);
+
+ // there must be exactly one list of requests for this address in the table
+ assert(it != req_table.end());
+
+ // get the request list
+ OutstandingReqList& req_list = it->second;
+ assert(!req_list.empty());
+
+ // save a request
+ OutstandingReq ret_req = req_list.back();
+
+ // remove the request from the list
+ req_list.pop_back();
+
+ // if the list is now empty, remove it from req_table
+ if (req_list.empty()) {
+ req_table.erase(it);
+ }
+
+ return ret_req;
+}
+
+void
+Thread::validateAtomicResp(Location loc, int lane, Value ret_val)
+{
+ if (!addrManager->validateAtomicResp(loc, ret_val)) {
+ std::stringstream ss;
+ Addr addr = addrManager->getAddress(loc);
+
+ // basic info
+ ss << threadName << ": Atomic Op returned unexpected value\n"
+ << "\tEpisode " << curEpisode->getEpisodeId() << "\n"
+ << "\tLane ID " << lane << "\n"
+ << "\tAddress " << printAddress(addr) << "\n"
+ << "\tAtomic Op's return value " << ret_val << "\n";
+
+ // print out basic info
+ warn("%s\n", ss.str());
+
+ // TODO add more detailed info
+
+ // dump all error info and exit the simulation
+ tester->dumpErrorLog(ss);
+ }
+}
+
+void
+Thread::validateLoadResp(Location loc, int lane, Value ret_val)
+{
+ if (ret_val != addrManager->getLoggedValue(loc)) {
+ std::stringstream ss;
+ Addr addr = addrManager->getAddress(loc);
+
+ // basic info
+ ss << threadName << ": Loaded value is not consistent with "
+ << "the last stored value\n"
+ << "\tThread " << threadId << "\n"
+ << "\tEpisode " << curEpisode->getEpisodeId() << "\n"
+ << "\tLane ID " << lane << "\n"
+ << "\tAddress " << printAddress(addr) << "\n"
+ << "\tLoaded value " << ret_val << "\n"
+ << "\tLast writer " << addrManager->printLastWriter(loc) << "\n";
+
+ // print out basic info
+ warn("%s\n", ss.str());
+
+ // TODO add more detailed info
+
+ // dump all error info and exit the simulation
+ tester->dumpErrorLog(ss);
+ }
+}
+
+bool
+Thread::checkDRF(Location atomic_loc, Location loc, bool isStore) const
+{
+ if (curEpisode && curEpisode->isEpsActive()) {
+ // check against the current episode this thread is executing
+ return curEpisode->checkDRF(atomic_loc, loc, isStore, numLanes);
+ }
+
+ return true;
+}
+
+void
+Thread::checkDeadlock()
+{
+ if ((curCycle() - lastActiveCycle) > deadlockThreshold) {
+ // deadlock detected
+ std::stringstream ss;
+
+ ss << threadName << ": Deadlock detected\n"
+ << "\tLast active cycle: " << lastActiveCycle << "\n"
+ << "\tCurrent cycle: " << curCycle() << "\n"
+ << "\tDeadlock threshold: " << deadlockThreshold << "\n";
+
+ // print out basic info
+ warn("%s\n", ss.str());
+
+ // dump all error info and exit the simulation
+ tester->dumpErrorLog(ss);
+ } else if (!tester->checkExit()) {
+ // schedule a future deadlock check event
+ assert(!deadlockCheckEvent.scheduled());
+ schedule(deadlockCheckEvent,
+ deadlockThreshold * clockPeriod() + curTick());
+ }
+}
+
+void
+Thread::printOutstandingReqs(const OutstandingReqTable& table,
+ std::stringstream& ss) const
+{
+ Cycles cur_cycle = curCycle();
+
+ for (const auto& m : table) {
+ for (const auto& req : m.second) {
+ ss << "\t\t\tAddr " << printAddress(m.first)
+ << ": delta (curCycle - issueCycle) = "
+ << (cur_cycle - req.issueCycle) << std::endl;
+ }
+ }
+}
+
+void
+Thread::printAllOutstandingReqs(std::stringstream& ss) const
+{
+ // dump all outstanding requests of this thread
+ ss << "\t\tOutstanding Loads:\n";
+ printOutstandingReqs(outstandingLoads, ss);
+ ss << "\t\tOutstanding Stores:\n";
+ printOutstandingReqs(outstandingStores, ss);
+ ss << "\t\tOutstanding Atomics:\n";
+ printOutstandingReqs(outstandingAtomics, ss);
+ ss << "\t\tNumber of outstanding acquires & releases: "
+ << pendingFenceCount << std::endl;
+}
diff --git a/src/cpu/testers/gpu_ruby_test/Thread.hh b/src/cpu/testers/gpu_ruby_test/Thread.hh
new file mode 100644
index 0000000..6816bb8
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/Thread.hh
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Tuan Ta
+ */
+
+/*
+ * Thread issues requests to and receives responses from Ruby memory
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_THREAD_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_THREAD_HH_
+
+#include "cpu/testers/gpu_ruby_test/AddressManager.hh"
+#include "cpu/testers/gpu_ruby_test/Episode.hh"
+#include "cpu/testers/gpu_ruby_test/ProtocolTester.hh"
+#include "gpu-compute/gpu_dyn_inst.hh"
+#include "sim/clocked_object.hh"
+
+class Thread : public ClockedObject
+{
+ public:
+ typedef ThreadParams Params;
+ Thread(const Params *p);
+ virtual ~Thread();
+
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ void wakeup();
+ void scheduleWakeup();
+ void checkDeadlock();
+ void scheduleDeadlockCheckEvent();
+
+ void attachThreadToPorts(ProtocolTester *_tester,
+ ProtocolTester::SeqPort *_port,
+ ProtocolTester::SeqPort *_sqcPort = nullptr,
+ ProtocolTester::SeqPort *_scalarPort = nullptr);
+
+ const std::string& getName() const { return threadName; }
+
+ // must be implemented by a child class
+ virtual void hitCallback(PacketPtr pkt) = 0;
+
+ int getThreadId() const { return threadId; }
+ int getNumLanes() const { return numLanes; }
+ // check if the input location would satisfy DRF constraint
+ bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
+
+ void printAllOutstandingReqs(std::stringstream& ss) const;
+
+ protected:
+ class ThreadEvent : public Event
+ {
+ private:
+ Thread* thread;
+ std::string desc;
+
+ public:
+ ThreadEvent(Thread* _thread, std::string _description)
+ : Event(CPU_Tick_Pri), thread(_thread), desc(_description)
+ {}
+ void setDesc(std::string _description) { desc = _description; }
+ void process() { thread->wakeup(); }
+ const std::string name() { return desc; }
+ };
+
+ ThreadEvent threadEvent;
+
+ class DeadlockCheckEvent : public Event
+ {
+ private:
+ Thread* thread;
+
+ public:
+ DeadlockCheckEvent(Thread* _thread)
+ : Event(CPU_Tick_Pri), thread(_thread)
+ {}
+ void process() { thread->checkDeadlock(); }
+ const std::string name() const { return "Tester deadlock check"; }
+ };
+
+ DeadlockCheckEvent deadlockCheckEvent;
+
+ struct OutstandingReq
+ {
+ int lane;
+ Location origLoc;
+ Value storedValue;
+ Cycles issueCycle;
+
+ OutstandingReq(int _lane, Location _loc, Value _val, Cycles _cycle)
+ : lane(_lane), origLoc(_loc), storedValue(_val), issueCycle(_cycle)
+ {}
+
+ ~OutstandingReq()
+ {}
+ };
+
+ // the unique global id of this thread
+ int threadId;
+ // width of this thread (1 for cpu thread & wf size for gpu wavefront)
+ int numLanes;
+ // thread name
+ std::string threadName;
+ // pointer to the main tester
+ ProtocolTester *tester;
+ // pointer to the address manager
+ AddressManager *addrManager;
+
+ ProtocolTester::SeqPort *port; // main data port (GPU-vector data)
+ ProtocolTester::SeqPort *scalarPort; // nullptr for CPU
+ ProtocolTester::SeqPort *sqcPort; // nullptr for CPU
+
+ // a list of issued episodes sorted by time
+ // the last episode in the list is the current episode
+ typedef std::vector<Episode*> EpisodeHistory;
+ EpisodeHistory episodeHistory;
+ // pointer to the current episode
+ Episode *curEpisode;
+ // pointer to the current action
+ const Episode::Action *curAction;
+
+ // number of outstanding requests that are waiting for their responses
+ int pendingLdStCount;
+ int pendingFenceCount;
+ int pendingAtomicCount;
+
+ // last cycle when there is an event in this thread
+ Cycles lastActiveCycle;
+ Cycles deadlockThreshold;
+
+ // a per-address list of outstanding requests
+ typedef std::vector<OutstandingReq> OutstandingReqList;
+ typedef std::unordered_map<Addr, OutstandingReqList> OutstandingReqTable;
+ OutstandingReqTable outstandingLoads;
+ OutstandingReqTable outstandingStores;
+ OutstandingReqTable outstandingAtomics;
+
+ void issueNewEpisode();
+ // check if the next action in the current episode satisfies all wait_cnt
+ // constraints and is ready to issue
+ bool isNextActionReady();
+ void issueNextAction();
+
+ // issue Ops to Ruby memory
+ // must be implemented by a child class
+ virtual void issueLoadOps() = 0;
+ virtual void issueStoreOps() = 0;
+ virtual void issueAtomicOps() = 0;
+ virtual void issueAcquireOp() = 0;
+ virtual void issueReleaseOp() = 0;
+
+ // add an outstanding request to its corresponding table
+ void addOutstandingReqs(OutstandingReqTable& req_table, Addr addr,
+ int lane, Location loc,
+ Value stored_val = AddressManager::INVALID_VALUE);
+
+ // pop an outstanding request from the input table
+ OutstandingReq popOutstandingReq(OutstandingReqTable& req_table,
+ Addr address);
+
+ // validate all atomic responses
+ void validateAtomicResp(Location loc, int lane, Value ret_val);
+ // validate all Load responses
+ void validateLoadResp(Location loc, int lane, Value ret_val);
+
+ void printOutstandingReqs(const OutstandingReqTable& table,
+ std::stringstream& ss) const;
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_THREAD_HH_ */
diff --git a/src/cpu/testers/gpu_ruby_test/Thread.py b/src/cpu/testers/gpu_ruby_test/Thread.py
new file mode 100644
index 0000000..22d2b78
--- /dev/null
+++ b/src/cpu/testers/gpu_ruby_test/Thread.py
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2017 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Tuan Ta
+#
+
+from ClockedObject import ClockedObject
+from m5.params import *
+from m5.proxy import *
+
+class Thread(ClockedObject):
+ type = 'Thread'
+ abstract = True
+ cxx_header = "cpu/testers/gpu_ruby_test/Thread.hh"
+ thread_id = Param.Int("Unique Thread ID")
+ num_lanes = Param.Int("Number of lanes this thread has")
+ deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold")
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 87f29eb..7fd0167 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -878,7 +878,7 @@
cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId, tmp_vaddr);
}
} else {
- if (pkt->cmd == MemCmd::MemFenceReq) {
+ if (pkt->cmd == MemCmd::MemSyncReq) {
gpuDynInst->statusBitVector = VectorMask(0);
} else {
gpuDynInst->statusBitVector &= (~(1ll << index));
@@ -951,7 +951,7 @@
req->setFlags(Request::KERNEL);
}
- // for non-kernel MemFence operations, memorder flags are set depending
+ // for non-kernel MemSync operations, memorder flags are set depending
// on which type of request is currently being sent, so this
// should be set by the caller (e.g. if an inst has acq-rel
// semantics, it will send one acquire req an one release req)
@@ -961,7 +961,7 @@
assert(req->isAcquire() || req->isRelease());
// create packet
- PacketPtr pkt = new Packet(req, MemCmd::MemFenceReq);
+ PacketPtr pkt = new Packet(req, MemCmd::MemSyncReq);
// set packet's sender state
pkt->senderState =
@@ -988,7 +988,7 @@
Addr paddr = pkt->req->getPaddr();
- if (pkt->cmd != MemCmd::MemFenceResp) {
+ if (pkt->cmd != MemCmd::MemSyncResp) {
int index = gpuDynInst->memStatusVector[paddr].back();
DPRINTF(GPUMem, "Response for addr %#x, index %d\n",
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index ffda3d5..0058df3 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -178,12 +178,12 @@
/* IntReq -- for interrupts */
{ SET4(IsWrite, IsRequest, NeedsResponse, HasData),
MessageResp, "MessageReq" },
- /* IntResp -- for interrupts */
+ /* MessageResp -- for interrupts or memory ACKs */
{ SET2(IsWrite, IsResponse), InvalidCmd, "MessageResp" },
- /* MemFenceReq -- for synchronization requests */
- {SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"},
- /* MemFenceResp -- for synchronization responses */
- {SET1(IsResponse), InvalidCmd, "MemFenceResp"},
+ /* MemSyncReq -- for synchronization requests */
+ {SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"},
+ /* MemSyncResp -- for synchronization responses */
+ {SET1(IsResponse), InvalidCmd, "MemSyncResp"},
/* Cache Clean Request -- Update with the latest data all existing
copies of the block down to the point indicated by the
request */
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 66625b3..28f4cfc 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -114,8 +114,8 @@
SwapResp,
MessageReq,
MessageResp,
- MemFenceReq,
- MemFenceResp,
+ MemSyncReq,
+ MemSyncResp,
CleanSharedReq,
CleanSharedResp,
CleanInvalidReq,
diff --git a/src/mem/protocol/GPU_VIPER-TCC.sm b/src/mem/protocol/GPU_VIPER-TCC.sm
index e21e980..42367d4 100644
--- a/src/mem/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/protocol/GPU_VIPER-TCC.sm
@@ -126,6 +126,7 @@
void wakeUpAllBuffers();
void wakeUpBuffers(Addr a);
+ MachineID mapAddressToMachine(Addr addr, MachineType mtype);
// FUNCTION DEFINITIONS
Tick clockEdge();
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 5977ce9..5162015 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -267,7 +267,7 @@
}
// Check for pio requests and directly send them to the dedicated
// pio port.
- if (pkt->cmd != MemCmd::MemFenceReq) {
+ if (pkt->cmd != MemCmd::MemSyncReq) {
if (!isPhysMemAddress(pkt->getAddr())) {
assert(ruby_port->memMasterPort.isConnected());
DPRINTF(RubyPort, "Request address %#x assumed to be a "
@@ -304,7 +304,7 @@
return true;
}
- if (pkt->cmd != MemCmd::MemFenceReq) {
+ if (pkt->cmd != MemCmd::MemSyncReq) {
DPRINTF(RubyPort,
"Request for address %#x did not issued because %s\n",
pkt->getAddr(), RequestStatus_to_string(requestStatus));
@@ -326,7 +326,7 @@
// Check for pio requests and directly send them to the dedicated
// pio port.
- if (pkt->cmd != MemCmd::MemFenceReq) {
+ if (pkt->cmd != MemCmd::MemSyncReq) {
if (!isPhysMemAddress(pkt->getAddr())) {
assert(ruby_port->memMasterPort.isConnected());
DPRINTF(RubyPort, "Request address %#x assumed to be a "
@@ -544,7 +544,7 @@
}
// Flush, acquire, release requests don't access physical memory
- if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) {
+ if (pkt->isFlush() || pkt->cmd == MemCmd::MemSyncReq) {
accessPhysMem = false;
}