misc: Merge branch version update into develop
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index 9443520..b530145 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -40,7 +40,8 @@
from common import ObjectList
from common import HMC
-def create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size):
+def create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size,\
+ xor_low_bit):
"""
Helper function for creating a single memoy controller from the given
options. This function is invoked multiple times in config_mem function
@@ -55,7 +56,10 @@
# the details of the caches here, make an educated guess. 4 MByte
# 4-way associative with 64 byte cache lines is 6 offset bits and
# 14 index bits.
- xor_low_bit = 20
+ if (xor_low_bit):
+ xor_high_bit = xor_low_bit + intlv_bits - 1
+ else:
+ xor_high_bit = 0
# Create an instance so we can figure out the address
# mapping and row-buffer size
@@ -81,8 +85,7 @@
ctrl.range = m5.objects.AddrRange(r.start, size = r.size(),
intlvHighBit = \
intlv_low_bit + intlv_bits - 1,
- xorHighBit = \
- xor_low_bit + intlv_bits - 1,
+ xorHighBit = xor_high_bit,
intlvBits = intlv_bits,
intlvMatch = i)
return ctrl
@@ -110,6 +113,7 @@
opt_mem_ranks = getattr(options, "mem_ranks", None)
opt_dram_powerdown = getattr(options, "enable_dram_powerdown", None)
opt_mem_channels_intlv = getattr(options, "mem_channels_intlv", 128)
+ opt_xor_low_bit = getattr(options, "xor_low_bit", 0)
if opt_mem_type == "HMC_2500_1x32":
HMChost = HMC.config_hmc_host_ctrl(options, system)
@@ -163,7 +167,7 @@
for r in system.mem_ranges:
for i in range(nbr_mem_ctrls):
mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits,
- intlv_size)
+ intlv_size, opt_xor_low_bit)
# Set the number of ranks based on the command-line
# options if it was explicitly set
if issubclass(cls, m5.objects.DRAMCtrl) and opt_mem_ranks:
diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py
index 8778b61..a78f73c 100644
--- a/configs/ruby/MOESI_CMP_directory.py
+++ b/configs/ruby/MOESI_CMP_directory.py
@@ -211,6 +211,7 @@
dir_cntrl.forwardFromDir.master = ruby_system.network.slave
dir_cntrl.requestToMemory = MessageBuffer()
dir_cntrl.responseFromMemory = MessageBuffer()
+ dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
for i, dma_port in enumerate(dma_ports):
diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py
index e69784f..9bceaa3 100644
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -76,6 +76,15 @@
parser.add_option("--numa-high-bit", type="int", default=0,
help="high order address bit to use for numa mapping. " \
"0 = highest bit, not specified = lowest bit")
+ parser.add_option("--interleaving-bits", type="int", default=0,
+ help="number of bits to specify interleaving " \
+ "in directory, memory controllers and caches. "
+ "0 = not specified")
+ parser.add_option("--xor-low-bit", type="int", default=20,
+ help="hashing bit for channel selection" \
+ "see MemConfig for explanation of the default"\
+ "parameter. If set to 0, xor_high_bit is also"\
+ "set to 0.")
parser.add_option("--recycle-latency", type="int", default=10,
help="Recycle latency for ruby controller input buffers")
@@ -86,7 +95,13 @@
Network.define_options(parser)
def setup_memory_controllers(system, ruby, dir_cntrls, options):
- ruby.block_size_bytes = options.cacheline_size
+ if (options.numa_high_bit):
+ block_size_bits = options.numa_high_bit + 1 - \
+ int(math.log(options.num_dirs, 2))
+ ruby.block_size_bytes = 2 ** (block_size_bits)
+ else:
+ ruby.block_size_bytes = options.cacheline_size
+
ruby.memory_size_bits = 48
index = 0
@@ -117,7 +132,7 @@
mem_type = ObjectList.mem_list.get(options.mem_type)
mem_ctrl = MemConfig.create_mem_ctrl(mem_type, r, index,
options.num_dirs, int(math.log(options.num_dirs, 2)),
- intlv_size)
+ intlv_size, options.xor_low_bit)
if options.access_backing_store:
mem_ctrl.kvm_map=False
diff --git a/src/arch/arm/freebsd/fs_workload.cc b/src/arch/arm/freebsd/fs_workload.cc
index 91a1d89..e3660d9 100644
--- a/src/arch/arm/freebsd/fs_workload.cc
+++ b/src/arch/arm/freebsd/fs_workload.cc
@@ -83,13 +83,12 @@
if (params()->early_kernel_symbols) {
kernelObj->loadGlobalSymbols(kernelSymtab, 0, 0, _loadAddrMask);
kernelObj->loadGlobalSymbols(
- Loader::debugSymbolTable, 0, 0, _loadAddrMask);
+ &Loader::debugSymbolTable, 0, 0, _loadAddrMask);
}
// Check if the kernel image has a symbol that tells us it supports
// device trees.
- Addr addr;
- fatal_if(!kernelSymtab->findAddress("fdt_get_range", addr),
+ fatal_if(kernelSymtab->find("fdt_get_range") == kernelSymtab->end(),
"Kernel must have fdt support.");
fatal_if(params()->dtb_filename == "", "dtb file is not specified.");
diff --git a/src/arch/arm/fs_workload.cc b/src/arch/arm/fs_workload.cc
index 09c7bb2..4c654b8 100644
--- a/src/arch/arm/fs_workload.cc
+++ b/src/arch/arm/fs_workload.cc
@@ -91,7 +91,7 @@
"Can't find a matching boot loader / kernel combination!");
if (bootldr)
- bootldr->loadGlobalSymbols(Loader::debugSymbolTable);
+ bootldr->loadGlobalSymbols(&Loader::debugSymbolTable);
}
void
diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc
index b84aa81..9c686f6 100644
--- a/src/arch/arm/insts/static_inst.cc
+++ b/src/arch/arm/insts/static_inst.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2014, 2016-2019 ARM Limited
+ * Copyright (c) 2010-2014, 2016-2020 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -393,18 +393,19 @@
ArmStaticInst::printTarget(std::ostream &os, Addr target,
const Loader::SymbolTable *symtab) const
{
- Addr symbolAddr;
- std::string symbol;
-
- if (symtab && symtab->findNearestSymbol(target, symbol, symbolAddr)) {
- ccprintf(os, "<%s", symbol);
- if (symbolAddr != target)
- ccprintf(os, "+%d>", target - symbolAddr);
- else
- ccprintf(os, ">");
- } else {
- ccprintf(os, "%#x", target);
+ if (symtab) {
+ auto it = symtab->findNearest(target);
+ if (it != symtab->end()) {
+ ccprintf(os, "<%s", it->name);
+ Addr delta = target - it->address;
+ if (delta)
+ ccprintf(os, "+%d>", delta);
+ else
+ ccprintf(os, ">");
+ return;
+ }
}
+ ccprintf(os, "%#x", target);
}
void
@@ -477,13 +478,14 @@
const Addr addr,
const std::string &suffix) const
{
- Addr symbolAddr;
- std::string symbol;
- if (symtab && symtab->findNearestSymbol(addr, symbol, symbolAddr)) {
- ccprintf(os, "%s%s", prefix, symbol);
- if (symbolAddr != addr)
- ccprintf(os, "+%d", addr - symbolAddr);
- ccprintf(os, suffix);
+ if (symtab) {
+ auto it = symtab->findNearest(addr);
+ if (it != symtab->end()) {
+ ccprintf(os, "%s%s", prefix, it->name);
+ if (it->address != addr)
+ ccprintf(os, "+%d", addr - it->address);
+ ccprintf(os, suffix);
+ }
}
}
@@ -984,37 +986,41 @@
}
Fault
-ArmStaticInst::checkSveTrap(ThreadContext *tc, CPSR cpsr) const
+ArmStaticInst::checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const
{
const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+ // Check if access disabled in CPACR_EL1
+ if (el <= EL1 && !ELIsInHost(tc, el)) {
+ if ((el == EL0 && cpacr.zen == 0x1) ||
+ (!(cpacr.zen & 0x1)))
+ return sveAccessTrap(EL1);
- if (ArmSystem::haveVirtualization(tc) && el <= EL2) {
- CPTR cptrEnCheck = tc->readMiscReg(MISCREG_CPTR_EL2);
- if (cptrEnCheck.tz)
- return sveAccessTrap(EL2);
+ if ((el == EL0 && cpacr.fpen == 0x1) ||
+ (!(cpacr.fpen & 0x1)))
+ return advSIMDFPAccessTrap64(EL1);
}
+ // Check if access disabled in CPTR_EL2
+ if (el <= EL2 && EL2Enabled(tc)) {
+ CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL2);
+ if (cptr_en_check.tz)
+ return sveAccessTrap(EL2);
+ if (cptr_en_check.tfp)
+ return advSIMDFPAccessTrap64(EL2);
+ }
+
+ // Check if access disabled in CPTR_EL3
if (ArmSystem::haveSecurity(tc)) {
- CPTR cptrEnCheck = tc->readMiscReg(MISCREG_CPTR_EL3);
- if (!cptrEnCheck.ez)
+ CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL3);
+ if (!cptr_en_check.ez)
return sveAccessTrap(EL3);
+ if (cptr_en_check.tfp)
+ return advSIMDFPAccessTrap64(EL3);
}
return NoFault;
}
-Fault
-ArmStaticInst::checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const
-{
- const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
- if ((el == EL0 && cpacr.zen != 0x3) ||
- (el == EL1 && !(cpacr.zen & 0x1)))
- return sveAccessTrap(EL1);
-
- return checkSveTrap(tc, cpsr);
-}
-
-
static uint8_t
getRestoredITBits(ThreadContext *tc, CPSR spsr)
{
diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh
index ce6569a..bee3903 100644
--- a/src/arch/arm/insts/static_inst.hh
+++ b/src/arch/arm/insts/static_inst.hh
@@ -476,11 +476,6 @@
Fault sveAccessTrap(ExceptionLevel el) const;
/**
- * Check an SVE access against CPTR_EL2 and CPTR_EL3.
- */
- Fault checkSveTrap(ThreadContext *tc, CPSR cpsr) const;
-
- /**
* Check an SVE access against CPACR_EL1, CPTR_EL2, and CPTR_EL3.
*/
Fault checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const;
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index b3d6726..b18bbb0 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -787,12 +787,12 @@
if (upper > 0) {
miscRegs[lower] = bits(v, 31, 0);
miscRegs[upper] = bits(v, 63, 32);
- DPRINTF(MiscRegs, "Writing to misc reg %d (%d:%d) : %#x\n",
- misc_reg, lower, upper, v);
+ DPRINTF(MiscRegs, "Writing MiscReg %s (%d %d:%d) : %#x\n",
+ miscRegName[misc_reg], misc_reg, lower, upper, v);
} else {
miscRegs[lower] = v;
- DPRINTF(MiscRegs, "Writing to misc reg %d (%d) : %#x\n",
- misc_reg, lower, v);
+ DPRINTF(MiscRegs, "Writing MiscReg %s (%d %d) : %#x\n",
+ miscRegName[misc_reg], misc_reg, lower, v);
}
}
diff --git a/src/arch/arm/isa/formats/branch.isa b/src/arch/arm/isa/formats/branch.isa
index b7360fc..7c726ef 100644
--- a/src/arch/arm/isa/formats/branch.isa
+++ b/src/arch/arm/isa/formats/branch.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010,2012-2013,2017-2018 ARM Limited
+// Copyright (c) 2010,2012-2013,2017-2018, 2020 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -187,8 +187,7 @@
case 0x4:
return new SevInst(machInst);
case 0x5:
- return new WarnUnimplemented(
- "sevl", machInst);
+ return new SevlInst(machInst);
}
break;
case 0x1:
diff --git a/src/arch/arm/isa/formats/data.isa b/src/arch/arm/isa/formats/data.isa
index a927f2b..b742951 100644
--- a/src/arch/arm/isa/formats/data.isa
+++ b/src/arch/arm/isa/formats/data.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2010,2017-2018 ARM Limited
+// Copyright (c) 2010,2017-2018, 2020 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -1136,8 +1136,7 @@
case 0x4:
return new SevInst(machInst);
case 0x5:
- return new WarnUnimplemented(
- "sevl", machInst);
+ return new SevlInst(machInst);
case 0x10:
return new WarnUnimplemented(
"esb", machInst);
@@ -1283,6 +1282,8 @@
return new WfiInst(machInst);
case 0x4:
return new SevInst(machInst);
+ case 0x5:
+ return new SevlInst(machInst);
default:
return new WarnUnimplemented("unallocated_hint", machInst);
}
diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa
index dc1d650..d828fcf 100644
--- a/src/arch/arm/isa/insts/ldr.isa
+++ b/src/arch/arm/isa/insts/ldr.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010-2011,2019 ARM Limited
+// Copyright (c) 2010-2011,2019-2020 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -182,6 +182,7 @@
self.instFlags.extend(["IsMemBarrier",
"IsWriteBarrier",
"IsReadBarrier"])
+ self.memFlags.append("Request::ACQUIRE")
# Disambiguate the class name for different flavors of loads
if self.flavor != "normal":
@@ -256,6 +257,7 @@
self.instFlags.extend(["IsMemBarrier",
"IsWriteBarrier",
"IsReadBarrier"])
+ self.memFlags.append("Request::ACQUIRE")
def emit(self):
# Address computation code
diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa
index 4f12509..fc4f34f 100644
--- a/src/arch/arm/isa/insts/ldr64.isa
+++ b/src/arch/arm/isa/insts/ldr64.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2011-2014, 2017, 2019 ARM Limited
+// Copyright (c) 2011-2014, 2017, 2019-2020 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -94,6 +94,8 @@
self.instFlags.extend(["IsMemBarrier",
"IsWriteBarrier",
"IsReadBarrier"])
+ self.memFlags.append("Request::ACQUIRE")
+
if self.flavor in ("acex", "exclusive", "exp", "acexp"):
self.memFlags.append("Request::LLSC")
diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa
index f542478..e99f6ad 100644
--- a/src/arch/arm/isa/insts/str.isa
+++ b/src/arch/arm/isa/insts/str.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010-2011,2017,2019 ARM Limited
+// Copyright (c) 2010-2011,2017,2019-2020 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -190,6 +190,7 @@
self.instFlags.extend(["IsMemBarrier",
"IsWriteBarrier",
"IsReadBarrier"])
+ self.memFlags.append("Request::RELEASE")
# Disambiguate the class name for different flavors of stores
if self.flavor != "normal":
@@ -271,6 +272,7 @@
self.instFlags.extend(["IsMemBarrier",
"IsWriteBarrier",
"IsReadBarrier"])
+ self.memFlags.append("Request::RELEASE")
# Disambiguate the class name for different flavors of stores
if self.flavor != "normal":
diff --git a/src/arch/arm/isa/insts/str64.isa b/src/arch/arm/isa/insts/str64.isa
index 22d1456..7ad1cad 100644
--- a/src/arch/arm/isa/insts/str64.isa
+++ b/src/arch/arm/isa/insts/str64.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2011-2013,2017,2019 ARM Limited
+// Copyright (c) 2011-2013,2017,2019-2020 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -82,6 +82,8 @@
self.instFlags.extend(["IsMemBarrier",
"IsWriteBarrier",
"IsReadBarrier"])
+ self.memFlags.append("Request::RELEASE")
+
if self.flavor in ("relex", "exclusive", "exp", "relexp"):
self.instFlags.append("IsStoreConditional")
self.memFlags.append("Request::LLSC")
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index aa4f194..9314ba9 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -1516,27 +1516,49 @@
# Generates definitions for SVE floating-point conversions (always
# unary, constructive, merging
def sveCvtInst(name, Name, opClass, types, op, direction=CvtDir.Narrow,
- decoder='Generic'):
+ decoder='Generic', signed=False):
global header_output, exec_output, decoders
+
+ if signed:
+ mask = "SElement msk = mask(sizeof(DElement)*8);"
+ assign_code = '''
+ int sign_bit = bits(destElem, sizeof(DElement)*8 -1);
+ AA64FpDest_x%(bigElemSuffix)s[i] =
+ sign_bit? (destElem|~msk): destElem;
+ ''' % {
+ 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'
+ }
+ else:
+ mask = "";
+ assign_code = '''
+ AA64FpDest_x%(bigElemSuffix)s[i] = destElem;
+ ''' % {
+ 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'
+ }
+
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<%(bigElemType)s>(
xc->tcBase());
+ %(mask)s
for (unsigned i = 0; i < eCount; i++) {
SElement srcElem1 = AA64FpOp1_x%(bigElemSuffix)s[i] &
mask(sizeof(SElement) * 8);
DElement destElem = 0;
if (GpOp_x%(bigElemSuffix)s[i]) {
%(op)s
- AA64FpDest_x%(bigElemSuffix)s[i] = destElem;
+ %(assign)s;
} else {
AA64FpDest_x%(bigElemSuffix)s[i] =
AA64FpDestMerge_x%(bigElemSuffix)s[i];
}
}
- ''' % {'op': op,
- 'bigElemType': 'SElement' if direction == CvtDir.Narrow
+ ''' % {'bigElemType': 'SElement' if direction == CvtDir.Narrow
else 'DElement',
- 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'}
+ 'op': op, 'mask': mask,
+ 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd',
+ 'assign': assign_code
+ }
+
iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp',
{'code': code, 'op_class': opClass}, [])
header_output += SveWideningUnaryPredOpDeclare.subst(iop)
@@ -1813,26 +1835,25 @@
xc->tcBase());
// Number of elements in a 128 bit segment
- constexpr unsigned ePerSegment = 128 / sizeof(Element);
+ constexpr unsigned ePerSegment = 16 / sizeof(Element);
- '''
-
- code += '''
+ ArmISA::VecRegContainer tmpC;
+ auto auxDest = tmpC.as<TPElem>();
for (unsigned i = 0; i < eCount; i++) {
- const auto segmentBase = i - i % ePerSegment;
- const auto segmentIdx = segmentBase + index;
+ const auto segmentBase = i - i %% ePerSegment;
+ const auto segmentIdx = segmentBase + index;
- const Element& srcElem1 = AA64FpOp1_x[i];
- const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
- Element destElem = 0;
+ const Element& srcElem1 = AA64FpOp1_x[i];
+ const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
+ Element destElem = 0;
- '''
-
- code += '''
- %(op)s
- AA64FpDest_x[i] = destElem;
+ %(op)s
+ auxDest[i] = destElem;
}
- ''' % {'op': op}
+
+ for (unsigned i = 0; i < eCount; i++) {
+ AA64FpDest_x[i] = auxDest[i];
+ }''' % {'op':op}
baseClass = 'SveBinIdxUnpredOp'
@@ -2045,8 +2066,10 @@
xc->tcBase());
// Number of elements in a 128 bit segment
- constexpr unsigned ePerSegment = 128 / sizeof(Element);
+ constexpr unsigned ePerSegment = 16 / sizeof(Element);
+ ArmISA::VecRegContainer tmpC;
+ auto auxDest = tmpC.as<TPElem>();
for (unsigned i = 0; i < eCount; i++) {
const auto segmentBase = i - i % ePerSegment;
const auto segmentIdx = segmentBase + index;
@@ -2055,10 +2078,13 @@
const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
Element destElem = AA64FpDestMerge_x[i];
'''
-
code += '''
%(op)s
- AA64FpDest_x[i] = destElem;
+ auxDest[i] = destElem;
+ }
+
+ for (unsigned i = 0; i < eCount; i++) {
+ AA64FpDest_x[i] = auxDest[i];
}''' % {'op': op}
iop = InstObjParams(name, 'Sve' + Name, 'SveBinIdxUnpredOp',
@@ -2743,6 +2769,7 @@
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
+
ArmISA::VecRegContainer tmpVecC;
auto auxDest = tmpVecC.as<Element>();
int firstelem = -1, lastelem = -2;
@@ -3001,6 +3028,9 @@
code += '''
uint32_t eltspersegment = 16 / (2 * sizeof(Element));'''
code += '''
+ ArmISA::VecRegContainer tmpC;
+ auto auxDest = tmpC.as<TPElem>();
+
for (int i = 0; i < eCount / 2; ++i) {'''
if predType == PredType.NONE:
code += '''
@@ -3044,9 +3074,14 @@
code += '''
}'''
code += '''
- AA64FpDest_x[2 * i] = addend_r;
- AA64FpDest_x[2 * i + 1] = addend_i;
- }'''
+ auxDest[2 * i] = addend_r;
+ auxDest[2 * i + 1] = addend_i;
+ }
+
+ for (unsigned i = 0; i < eCount; i++) {
+ AA64FpDest_x[i] = auxDest[i];
+ }
+ '''
iop = InstObjParams(name, 'Sve' + Name,
'SveComplexIdxOp' if predType == PredType.NONE
else 'SveComplexOp',
@@ -3596,7 +3631,7 @@
'uint32_t, uint32_t',
'uint64_t, uint32_t',
'uint64_t, uint64_t'),
- fcvtzsCode, CvtDir.Narrow)
+ fcvtzsCode, CvtDir.Narrow, signed=True)
sveCvtInst('fcvtzs', 'FcvtzsWiden', 'SimdCvtOp',
('uint16_t, uint32_t',
'uint16_t, uint64_t',
diff --git a/src/arch/arm/linux/fs_workload.cc b/src/arch/arm/linux/fs_workload.cc
index cc28193..7f0853f 100644
--- a/src/arch/arm/linux/fs_workload.cc
+++ b/src/arch/arm/linux/fs_workload.cc
@@ -78,15 +78,14 @@
if (params()->early_kernel_symbols) {
kernelObj->loadGlobalSymbols(kernelSymtab, 0, 0, _loadAddrMask);
kernelObj->loadGlobalSymbols(
- Loader::debugSymbolTable, 0, 0, _loadAddrMask);
+ &Loader::debugSymbolTable, 0, 0, _loadAddrMask);
}
// Setup boot data structure
- Addr addr;
// Check if the kernel image has a symbol that tells us it supports
// device trees.
bool kernel_has_fdt_support =
- kernelSymtab->findAddress("unflatten_device_tree", addr);
+ kernelSymtab->find("unflatten_device_tree") != kernelSymtab->end();
bool dtb_file_specified = params()->dtb_filename != "";
if (kernel_has_fdt_support && dtb_file_specified) {
diff --git a/src/arch/arm/linux/process.cc b/src/arch/arm/linux/process.cc
index b5b6553..4c679b3 100644
--- a/src/arch/arm/linux/process.cc
+++ b/src/arch/arm/linux/process.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2013, 2015 ARM Limited
+ * Copyright (c) 2010-2013, 2015, 2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -290,14 +290,14 @@
{ base + 151, "munlock" },
{ base + 152, "mlockall" },
{ base + 153, "munlockall" },
- { base + 154, "sched_setparam" },
- { base + 155, "sched_getparam" },
- { base + 156, "sched_setscheduler" },
- { base + 157, "sched_getscheduler" },
- { base + 158, "sched_yield" },
- { base + 159, "sched_get_priority_max" },
- { base + 160, "sched_get_priority_min" },
- { base + 161, "sched_rr_get_interval" },
+ { base + 154, "sched_setparam", ignoreWarnOnceFunc },
+ { base + 155, "sched_getparam", ignoreWarnOnceFunc },
+ { base + 156, "sched_setscheduler", ignoreWarnOnceFunc },
+ { base + 157, "sched_getscheduler", ignoreWarnOnceFunc },
+ { base + 158, "sched_yield", ignoreWarnOnceFunc },
+ { base + 159, "sched_get_priority_max", ignoreWarnOnceFunc },
+ { base + 160, "sched_get_priority_min", ignoreWarnOnceFunc },
+ { base + 161, "sched_rr_get_interval", ignoreWarnOnceFunc },
{ base + 162, "nanosleep", ignoreWarnOnceFunc },
{ base + 163, "mremap", mremapFunc<ArmLinux32> }, // ARM-specific
{ base + 164, "setresuid" },
@@ -375,7 +375,7 @@
{ base + 238, "tkill" },
{ base + 239, "sendfile64" },
{ base + 240, "futex", futexFunc<ArmLinux32> },
- { base + 241, "sched_setaffinity" },
+ { base + 241, "sched_setaffinity", ignoreWarnOnceFunc },
{ base + 242, "sched_getaffinity", ignoreFunc },
{ base + 243, "io_setup" },
{ base + 244, "io_destroy" },
@@ -476,7 +476,7 @@
{ base + 342, "tee" },
{ base + 343, "vmsplice" },
{ base + 344, "move_pages" },
- { base + 345, "getcpu" },
+ { base + 345, "getcpu", getcpuFunc },
{ base + 346, "epoll_pwait" },
{ base + 347, "sys_kexec_load" },
{ base + 348, "sys_utimensat" },
@@ -631,16 +631,16 @@
{ base + 115, "clock_nanosleep" },
{ base + 116, "syslog" },
{ base + 117, "ptrace" },
- { base + 118, "sched_setparam" },
- { base + 119, "sched_setscheduler" },
- { base + 120, "sched_getscheduler" },
- { base + 121, "sched_getparam" },
- { base + 122, "sched_setaffinity" },
+ { base + 118, "sched_setparam", ignoreWarnOnceFunc },
+ { base + 119, "sched_setscheduler", ignoreWarnOnceFunc },
+ { base + 120, "sched_getscheduler", ignoreWarnOnceFunc },
+ { base + 121, "sched_getparam", ignoreWarnOnceFunc },
+ { base + 122, "sched_setaffinity", ignoreWarnOnceFunc },
{ base + 123, "sched_getaffinity", ignoreFunc },
- { base + 124, "sched_yield" },
- { base + 125, "sched_get_priority_max" },
- { base + 126, "sched_get_priority_min" },
- { base + 127, "sched_rr_get_interval" },
+ { base + 124, "sched_yield", ignoreWarnOnceFunc },
+ { base + 125, "sched_get_priority_max", ignoreWarnOnceFunc },
+ { base + 126, "sched_get_priority_min", ignoreWarnOnceFunc },
+ { base + 127, "sched_rr_get_interval", ignoreWarnOnceFunc },
{ base + 128, "restart_syscall" },
{ base + 129, "kill", ignoreFunc },
{ base + 130, "tkill" },
@@ -681,7 +681,7 @@
{ base + 165, "getrusage", getrusageFunc<ArmLinux64> },
{ base + 166, "umask" },
{ base + 167, "prctl" },
- { base + 168, "getcpu" },
+ { base + 168, "getcpu", getcpuFunc },
{ base + 169, "gettimeofday", gettimeofdayFunc<ArmLinux64> },
{ base + 170, "settimeofday" },
{ base + 171, "adjtimex" },
diff --git a/src/arch/arm/semihosting.cc b/src/arch/arm/semihosting.cc
index a7a4d2a..ab21dda 100644
--- a/src/arch/arm/semihosting.cc
+++ b/src/arch/arm/semihosting.cc
@@ -696,18 +696,6 @@
namespace GuestABI
{
-// Ignore return values since those will be handled by semihosting.
-template <typename T>
-struct Result<SemiPseudoAbi32, T>
-{
- static void store(ThreadContext *tc, const T &ret) {}
-};
-template <typename T>
-struct Result<SemiPseudoAbi64, T>
-{
- static void store(ThreadContext *tc, const T &ret) {}
-};
-
// Handle arguments the same as for semihosting operations. Skipping the first
// slot is handled internally by the State type.
template <typename T>
diff --git a/src/arch/arm/stacktrace.cc b/src/arch/arm/stacktrace.cc
index d0e702c..535cddd 100644
--- a/src/arch/arm/stacktrace.cc
+++ b/src/arch/arm/stacktrace.cc
@@ -47,11 +47,10 @@
PortProxy &vp = tc->getVirtProxy();
const auto *symtab = tc->getSystemPtr()->workload->symtab(tc);
- Addr addr;
- if (!symtab->findAddress(name, addr))
- panic("thread info not compiled into kernel\n");
+ auto it = symtab->find(name);
+ panic_if(it == symtab->end(), "Thread info not compiled into kernel.");
- return vp.read<int32_t>(addr, GuestByteOrder);
+ return vp.read<int32_t>(it->address, GuestByteOrder);
}
ProcessInfo::ProcessInfo(ThreadContext *_tc) : tc(_tc)
diff --git a/src/arch/generic/linux/threadinfo.hh b/src/arch/generic/linux/threadinfo.hh
index 83e41b9..5e058cf 100644
--- a/src/arch/generic/linux/threadinfo.hh
+++ b/src/arch/generic/linux/threadinfo.hh
@@ -46,15 +46,16 @@
bool
get_data(const char *symbol, T &data)
{
- Addr addr = 0;
- if (!sys->workload->symtab(tc)->findAddress(symbol, addr)) {
+ auto *symtab = sys->workload->symtab(tc);
+ auto it = symtab->find(symbol);
+ if (it == symtab->end()) {
warn_once("Unable to find kernel symbol %s\n", symbol);
warn_once("Kernel not compiled with task_struct info; can't get "
"currently executing task/process/thread name/ids!\n");
return false;
}
- data = tc->getVirtProxy().read<T>(addr, TheISA::GuestByteOrder);
+ data = tc->getVirtProxy().read<T>(it->address, TheISA::GuestByteOrder);
return true;
}
diff --git a/src/arch/mips/isa/formats/branch.isa b/src/arch/mips/isa/formats/branch.isa
index 06662f0..4975a13 100644
--- a/src/arch/mips/isa/formats/branch.isa
+++ b/src/arch/mips/isa/formats/branch.isa
@@ -193,11 +193,11 @@
Addr target = pc + 4 + disp;
- std::string str;
- if (symtab && symtab->findSymbol(target, str))
- ss << str;
+ Loader::SymbolTable::const_iterator it;
+ if (symtab && (it = symtab->find(target)) != symtab->end())
+ ss << it->name;
else
- ccprintf(ss, "0x%x", target);
+ ccprintf(ss, "%#x", target);
return ss.str();
}
@@ -213,9 +213,9 @@
Addr npc = pc + 4;
ccprintf(ss,"0x%x",(npc & 0xF0000000) | disp);
} else if (_numSrcRegs == 0) {
- std::string str;
- if (symtab && symtab->findSymbol(disp, str))
- ss << str;
+ Loader::SymbolTable::const_iterator it;
+ if (symtab && (it = symtab->find(disp)) != symtab->end())
+ ss << it->name;
else
ccprintf(ss, "0x%x", disp);
} else if (_numSrcRegs == 1) {
diff --git a/src/arch/power/insts/branch.cc b/src/arch/power/insts/branch.cc
index 5fe0c4e..3511b6b 100644
--- a/src/arch/power/insts/branch.cc
+++ b/src/arch/power/insts/branch.cc
@@ -68,11 +68,11 @@
Addr target = pc + disp;
- std::string str;
- if (symtab && symtab->findSymbol(target, str))
- ss << str;
+ Loader::SymbolTable::const_iterator it;
+ if (symtab && (it = symtab->find(target)) != symtab->end())
+ ss << it->name;
else
- ccprintf(ss, "0x%x", target);
+ ccprintf(ss, "%#x", target);
return ss.str();
}
@@ -91,11 +91,11 @@
ccprintf(ss, "%-10s ", mnemonic);
- std::string str;
- if (symtab && symtab->findSymbol(targetAddr, str))
- ss << str;
+ Loader::SymbolTable::const_iterator it;
+ if (symtab && (it = symtab->find(targetAddr)) != symtab->end())
+ ss << it->name;
else
- ccprintf(ss, "0x%x", targetAddr);
+ ccprintf(ss, "%#x", targetAddr);
return ss.str();
}
@@ -118,11 +118,11 @@
Addr target = pc + disp;
- std::string str;
- if (symtab && symtab->findSymbol(target, str))
- ss << str;
+ Loader::SymbolTable::const_iterator it;
+ if (symtab && (it = symtab->find(target)) != symtab->end())
+ ss << it->name;
else
- ccprintf(ss, "0x%x", target);
+ ccprintf(ss, "%#x", target);
return ss.str();
}
@@ -143,11 +143,11 @@
ss << bo << ", " << bi << ", ";
- std::string str;
- if (symtab && symtab->findSymbol(targetAddr, str))
- ss << str;
+ Loader::SymbolTable::const_iterator it;
+ if (symtab && (it = symtab->find(targetAddr)) != symtab->end())
+ ss << it->name;
else
- ccprintf(ss, "0x%x", targetAddr);
+ ccprintf(ss, "%#x", targetAddr);
return ss.str();
}
diff --git a/src/arch/riscv/bare_metal/fs_workload.hh b/src/arch/riscv/bare_metal/fs_workload.hh
index 2e26ad1..a142d47 100644
--- a/src/arch/riscv/bare_metal/fs_workload.hh
+++ b/src/arch/riscv/bare_metal/fs_workload.hh
@@ -55,9 +55,9 @@
return bootloaderSymtab;
}
bool
- insertSymbol(Addr address, const std::string &symbol) override
+ insertSymbol(const Loader::Symbol &symbol) override
{
- return bootloaderSymtab->insert(address, symbol);
+ return bootloaderSymtab->insert(symbol);
}
};
diff --git a/src/arch/sparc/fs_workload.hh b/src/arch/sparc/fs_workload.hh
index 0323714..650ed37 100644
--- a/src/arch/sparc/fs_workload.hh
+++ b/src/arch/sparc/fs_workload.hh
@@ -61,9 +61,9 @@
}
bool
- insertSymbol(Addr address, const std::string &symbol) override
+ insertSymbol(const Loader::Symbol &symbol) override
{
- return defaultSymtab.insert(address, symbol);
+ return defaultSymtab.insert(symbol);
}
};
diff --git a/src/arch/sparc/insts/branch.cc b/src/arch/sparc/insts/branch.cc
index 8ffa241..52517e6 100644
--- a/src/arch/sparc/insts/branch.cc
+++ b/src/arch/sparc/insts/branch.cc
@@ -77,18 +77,17 @@
Addr pc, const Loader::SymbolTable *symtab) const
{
std::stringstream response;
- std::string symbol;
- Addr symbol_addr;
Addr target = disp + pc;
printMnemonic(response, mnemonic);
- ccprintf(response, "0x%x", target);
+ ccprintf(response, "%#x", target);
- if (symtab && symtab->findNearestSymbol(target, symbol, symbol_addr)) {
- ccprintf(response, " <%s", symbol);
- if (symbol_addr != target)
- ccprintf(response, "+%d>", target - symbol_addr);
+ Loader::SymbolTable::const_iterator it;
+ if (symtab && (it = symtab->findNearest(target)) != symtab->end()) {
+ ccprintf(response, " <%s", it->name);
+ if (it->address != target)
+ ccprintf(response, "+%d>", target - it->address);
else
ccprintf(response, ">");
}
diff --git a/src/arch/x86/faults.cc b/src/arch/x86/faults.cc
index 98bd107..0754da3 100644
--- a/src/arch/x86/faults.cc
+++ b/src/arch/x86/faults.cc
@@ -169,7 +169,7 @@
panic("Tried to %s unmapped address %#x.\nPC: %#x, Instr: %s",
modeStr, addr, tc->pcState().pc(),
inst->disassemble(tc->pcState().pc(),
- Loader::debugSymbolTable));
+ &Loader::debugSymbolTable));
}
}
}
diff --git a/src/arch/x86/linux/process.cc b/src/arch/x86/linux/process.cc
index 6b50dbf..2c594e7 100644
--- a/src/arch/x86/linux/process.cc
+++ b/src/arch/x86/linux/process.cc
@@ -566,7 +566,7 @@
{ 306, "syncfs" },
{ 307, "sendmmsg" },
{ 308, "setns" },
- { 309, "getcpu" },
+ { 309, "getcpu", getcpuFunc },
{ 310, "proess_vm_readv" },
{ 311, "proess_vm_writev" },
{ 312, "kcmp" },
@@ -914,7 +914,7 @@
{ 315, "tee" },
{ 316, "vmsplice" },
{ 317, "move_pages" },
- { 318, "getcpu" },
+ { 318, "getcpu", getcpuFunc },
{ 319, "epoll_pwait" },
{ 320, "utimensat" },
{ 321, "signalfd" },
diff --git a/src/arch/x86/stacktrace.cc b/src/arch/x86/stacktrace.cc
index a7c548e..0e5341c 100644
--- a/src/arch/x86/stacktrace.cc
+++ b/src/arch/x86/stacktrace.cc
@@ -47,11 +47,10 @@
PortProxy &vp = tc->getVirtProxy();
const auto *symtab = tc->getSystemPtr()->workload->symtab(tc);
- Addr addr;
- if (!symtab->findAddress(name, addr))
- panic("thread info not compiled into kernel\n");
+ auto it = symtab->find(name);
+ panic_if(it == symtab->end(), "Thread info not compiled into kernel.");
- return vp.read<int32_t>(addr, GuestByteOrder);
+ return vp.read<int32_t>(it->address, GuestByteOrder);
}
ProcessInfo::ProcessInfo(ThreadContext *_tc) : tc(_tc)
@@ -196,14 +195,15 @@
std::string symbol;
for (int i = 0, size = stack.size(); i < size; ++i) {
Addr addr = stack[size - i - 1];
+ Loader::SymbolTable::const_iterator it;
if (addr == user)
symbol = "user";
else if (addr == console)
symbol = "console";
else if (addr == unknown)
symbol = "unknown";
- else
- symtab->findSymbol(addr, symbol);
+ else if ((it = symtab->find(addr)) != symtab->end())
+ symbol = it->name;
DPRINTFN("%#x: %s\n", addr, symbol);
}
diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc
index 8068423..4e9d4be 100644
--- a/src/arch/x86/tlb.cc
+++ b/src/arch/x86/tlb.cc
@@ -268,7 +268,7 @@
[func, mode](ThreadContext *tc, PacketPtr pkt) -> Cycles
{
uint64_t ret;
- PseudoInst::pseudoInst<X86PseudoInstABI>(tc, func, ret);
+ PseudoInst::pseudoInst<X86PseudoInstABI, true>(tc, func, ret);
if (mode == Read)
pkt->setLE(ret);
return Cycles(1);
diff --git a/src/base/cp_annotate.cc b/src/base/cp_annotate.cc
index c886e39..159e6e0 100644
--- a/src/base/cp_annotate.cc
+++ b/src/base/cp_annotate.cc
@@ -163,7 +163,7 @@
Addr junk;
char sm[50];
if (!TheISA::inUserMode(tc))
- Loader::debugSymbolTable->findNearestSymbol(
+ Loader::debugSymbolTable.findNearestSymbol(
tc->readIntReg(ReturnAddressReg), st, junk);
tc->getVirtProxy().readString(sm, sm_string, 50);
@@ -337,7 +337,7 @@
Addr sym_addr = 0;
if (!TheISA::inUserMode(tc)) {
- Loader::debugSymbolTable->findNearestSymbol(next_pc, sym, sym_addr);
+ Loader::debugSymbolTable.findNearestSymbol(next_pc, sym, sym_addr);
} else {
Linux::ThreadInfo ti(tc);
string app = ti.curTaskName();
@@ -390,7 +390,7 @@
std::string st;
Addr junk;
if (!TheISA::inUserMode(tc))
- Loader::debugSymbolTable->findNearestSymbol(
+ Loader::debugSymbolTable.findNearestSymbol(
tc->readIntReg(ReturnAddressReg), st, junk);
System *sys = tc->getSystemPtr();
StringWrap name(sys->name());
diff --git a/src/base/loader/elf_object.cc b/src/base/loader/elf_object.cc
index 8876a87..ceafc53 100644
--- a/src/base/loader/elf_object.cc
+++ b/src/base/loader/elf_object.cc
@@ -341,7 +341,23 @@
elf_strptr(elf, shdr.sh_link, sym.st_name);
if (sym_name && sym_name[0] != '$') {
Addr value = sym.st_value - base + offset;
- if (symtab->insert(value & mask, sym_name)) {
+ Loader::Symbol symbol;
+ symbol.address = value & mask;
+ symbol.name = sym_name;
+ switch (binding) {
+ case STB_GLOBAL:
+ symbol.binding = Loader::Symbol::Binding::Global;
+ break;
+ case STB_LOCAL:
+ symbol.binding = Loader::Symbol::Binding::Local;
+ break;
+ case STB_WEAK:
+ symbol.binding = Loader::Symbol::Binding::Weak;
+ break;
+ default:
+ panic("Unrecognized binding type");
+ }
+ if (symtab->insert(symbol)) {
DPRINTF(Loader, "Symbol: %-40s value %#x\n",
sym_name, value);
}
diff --git a/src/base/loader/symtab.cc b/src/base/loader/symtab.cc
index 9e0f1f6..eaada22 100644
--- a/src/base/loader/symtab.cc
+++ b/src/base/loader/symtab.cc
@@ -35,6 +35,7 @@
#include "base/logging.hh"
#include "base/str.hh"
+#include "base/trace.hh"
#include "base/types.hh"
#include "sim/serialize.hh"
@@ -43,31 +44,53 @@
namespace Loader
{
-SymbolTable *debugSymbolTable = NULL;
+SymbolTable debugSymbolTable;
void
SymbolTable::clear()
{
- addrTable.clear();
- symbolTable.clear();
+ addrMap.clear();
+ nameMap.clear();
+ symbols.clear();
}
bool
-SymbolTable::insert(Addr address, string symbol)
+SymbolTable::insert(const Symbol &symbol)
{
- if (symbol.empty())
+ if (symbol.name.empty())
return false;
- if (!symbolTable.insert(make_pair(symbol, address)).second)
+ int idx = symbols.size();
+
+ if (!nameMap.insert({ symbol.name, idx }).second)
return false;
// There can be multiple symbols for the same address, so always
// update the addrTable multimap when we see a new symbol name.
- addrTable.insert(make_pair(address, symbol));
+ addrMap.insert({ symbol.address, idx });
+
+ symbols.emplace_back(symbol);
return true;
}
+bool
+SymbolTable::insert(const SymbolTable &other)
+{
+ // Check if any symbol in other already exists in our table.
+ NameMap intersection;
+ std::set_intersection(other.nameMap.begin(), other.nameMap.end(),
+ nameMap.begin(), nameMap.end(),
+ std::inserter(intersection, intersection.begin()),
+ nameMap.value_comp());
+ if (!intersection.empty())
+ return false;
+
+ for (const Symbol &symbol: other)
+ insert(symbol);
+
+ return true;
+}
bool
SymbolTable::load(const string &filename)
@@ -92,51 +115,54 @@
if (address.empty())
return false;
- string symbol = buffer.substr(idx + 1);
- eat_white(symbol);
- if (symbol.empty())
+ string name = buffer.substr(idx + 1);
+ eat_white(name);
+ if (name.empty())
return false;
Addr addr;
if (!to_number(address, addr))
return false;
- if (!insert(addr, symbol))
+ if (!insert({ Symbol::Binding::Global, name, addr }))
return false;
}
file.close();
-
return true;
}
void
SymbolTable::serialize(const string &base, CheckpointOut &cp) const
{
- paramOut(cp, base + ".size", addrTable.size());
+ paramOut(cp, base + ".size", symbols.size());
int i = 0;
- ATable::const_iterator p, end = addrTable.end();
- for (p = addrTable.begin(); p != end; ++p) {
- paramOut(cp, csprintf("%s.addr_%d", base, i), p->first);
- paramOut(cp, csprintf("%s.symbol_%d", base, i), p->second);
- ++i;
+ for (auto &symbol: symbols) {
+ paramOut(cp, csprintf("%s.addr_%d", base, i), symbol.address);
+ paramOut(cp, csprintf("%s.symbol_%d", base, i), symbol.name);
+ paramOut(cp, csprintf("%s.binding_%d", base, i), (int)symbol.binding);
+ i++;
}
}
void
-SymbolTable::unserialize(const string &base, CheckpointIn &cp)
+SymbolTable::unserialize(const string &base, CheckpointIn &cp,
+ Symbol::Binding default_binding)
{
clear();
int size;
paramIn(cp, base + ".size", size);
for (int i = 0; i < size; ++i) {
- Addr addr;
- std::string symbol;
+ Addr address;
+ std::string name;
+ Symbol::Binding binding = default_binding;
- paramIn(cp, csprintf("%s.addr_%d", base, i), addr);
- paramIn(cp, csprintf("%s.symbol_%d", base, i), symbol);
- insert(addr, symbol);
+ paramIn(cp, csprintf("%s.addr_%d", base, i), address);
+ paramIn(cp, csprintf("%s.symbol_%d", base, i), name);
+ if (!optParamIn(cp, csprintf("%s.binding_%d", base, i), binding))
+ binding = default_binding;
+ insert({binding, name, address});
}
}
diff --git a/src/base/loader/symtab.hh b/src/base/loader/symtab.hh
index b09d854..1e99fec 100644
--- a/src/base/loader/symtab.hh
+++ b/src/base/loader/symtab.hh
@@ -29,143 +29,205 @@
#ifndef __SYMTAB_HH__
#define __SYMTAB_HH__
+#include <functional>
#include <iosfwd>
#include <map>
+#include <memory>
#include <string>
+#include <vector>
+#include "base/trace.hh"
#include "base/types.hh"
#include "sim/serialize.hh"
namespace Loader
{
+struct Symbol
+{
+ enum class Binding {
+ Global,
+ Local,
+ Weak
+ };
+
+ Binding binding;
+ std::string name;
+ Addr address;
+};
+
class SymbolTable
{
public:
- typedef std::multimap<Addr, std::string> ATable;
- typedef std::map<std::string, Addr> STable;
+ typedef std::shared_ptr<SymbolTable> SymbolTablePtr;
private:
- ATable addrTable;
- STable symbolTable;
+ typedef std::vector<Symbol> SymbolVector;
+ // Map addresses to an index into the symbol vector.
+ typedef std::multimap<Addr, int> AddrMap;
+ // Map a symbol name to an index into the symbol vector.
+ typedef std::map<std::string, int> NameMap;
- private:
+ SymbolVector symbols;
+ AddrMap addrMap;
+ NameMap nameMap;
+
bool
- upperBound(Addr addr, ATable::const_iterator &iter) const
+ upperBound(Addr addr, AddrMap::const_iterator &iter) const
{
// find first key *larger* than desired address
- iter = addrTable.upper_bound(addr);
+ iter = addrMap.upper_bound(addr);
// if very first key is larger, we're out of luck
- if (iter == addrTable.begin())
+ if (iter == addrMap.begin())
return false;
return true;
}
+ typedef std::function<void(SymbolTable &symtab,
+ const Symbol &symbol)> SymTabOp;
+ SymbolTablePtr
+ operate(SymTabOp op) const
+ {
+ SymbolTablePtr symtab(new SymbolTable);
+ for (const auto &symbol: symbols)
+ op(*symtab, symbol);
+ return symtab;
+ }
+
+ typedef std::function<bool(const Symbol &symbol)> SymTabFilter;
+ SymbolTablePtr
+ filter(SymTabFilter filter) const
+ {
+ SymTabOp apply_filter =
+ [filter](SymbolTable &symtab, const Symbol &symbol) {
+ if (filter(symbol)) {
+ symtab.insert(symbol);
+ }
+ };
+ return operate(apply_filter);
+ }
+
+ SymbolTablePtr
+ filterByBinding(Symbol::Binding binding) const
+ {
+ auto filt = [binding](const Symbol &symbol) {
+ return symbol.binding == binding;
+ };
+ return filter(filt);
+ }
+
public:
- SymbolTable() {}
- SymbolTable(const std::string &file) { load(file); }
- ~SymbolTable() {}
+ typedef SymbolVector::iterator iterator;
+ typedef SymbolVector::const_iterator const_iterator;
+
+ const_iterator begin() const { return symbols.begin(); }
+ const_iterator end() const { return symbols.end(); }
void clear();
- bool insert(Addr address, std::string symbol);
+ // Insert either a single symbol or the contents of an entire symbol table
+ // into this one.
+ bool insert(const Symbol &symbol);
+ bool insert(const SymbolTable &other);
bool load(const std::string &file);
+ bool empty() const { return symbols.empty(); }
- const ATable &getAddrTable() const { return addrTable; }
- const STable &getSymbolTable() const { return symbolTable; }
-
- public:
- void serialize(const std::string &base, CheckpointOut &cp) const;
- void unserialize(const std::string &base, CheckpointIn &cp);
-
- public:
- bool
- findSymbol(Addr address, std::string &symbol) const
+ SymbolTablePtr
+ offset(Addr by) const
{
- ATable::const_iterator i = addrTable.find(address);
- if (i == addrTable.end())
- return false;
+ SymTabOp op = [by](SymbolTable &symtab, const Symbol &symbol) {
+ Symbol sym = symbol;
+ sym.address += by;
+ symtab.insert(sym);
+ };
+ return operate(op);
+ }
+
+ SymbolTablePtr
+ mask(Addr m) const
+ {
+ SymTabOp op = [m](SymbolTable &symtab, const Symbol &symbol) {
+ Symbol sym = symbol;
+ sym.address &= m;
+ symtab.insert(sym);
+ };
+ return operate(op);
+ }
+
+ SymbolTablePtr
+ globals() const
+ {
+ return filterByBinding(Symbol::Binding::Global);
+ }
+
+ SymbolTablePtr
+ locals() const
+ {
+ return filterByBinding(Symbol::Binding::Local);
+ }
+
+ SymbolTablePtr
+ weaks() const
+ {
+ return filterByBinding(Symbol::Binding::Weak);
+ }
+
+ void serialize(const std::string &base, CheckpointOut &cp) const;
+ void unserialize(const std::string &base, CheckpointIn &cp,
+ Symbol::Binding default_binding=Symbol::Binding::Global);
+
+ const_iterator
+ find(Addr address) const
+ {
+ AddrMap::const_iterator i = addrMap.find(address);
+ if (i == addrMap.end())
+ return end();
// There are potentially multiple symbols that map to the same
// address. For simplicity, just return the first one.
- symbol = (*i).second;
- return true;
+ return symbols.begin() + i->second;
}
- bool
- findAddress(const std::string &symbol, Addr &address) const
+ const_iterator
+ find(const std::string &name) const
{
- STable::const_iterator i = symbolTable.find(symbol);
- if (i == symbolTable.end())
- return false;
+ NameMap::const_iterator i = nameMap.find(name);
+ if (i == nameMap.end())
+ return end();
- address = (*i).second;
- return true;
+ return symbols.begin() + i->second;
}
/// Find the nearest symbol equal to or less than the supplied
/// address (e.g., the label for the enclosing function).
/// @param addr The address to look up.
- /// @param symbol Return reference for symbol string.
- /// @param symaddr Return reference for symbol address.
/// @param nextaddr Address of following symbol (for
/// determining valid range of symbol).
- /// @retval True if a symbol was found.
- bool
- findNearestSymbol(Addr addr, std::string &symbol, Addr &symaddr,
- Addr &nextaddr) const
+ /// @retval A const_iterator which points to the symbol if found, or end.
+ const_iterator
+ findNearest(Addr addr, Addr &nextaddr) const
{
- ATable::const_iterator i;
+ AddrMap::const_iterator i = addrMap.end();
if (!upperBound(addr, i))
- return false;
+ return end();
nextaddr = i->first;
--i;
- symaddr = i->first;
- symbol = i->second;
- return true;
+ return symbols.begin() + i->second;
}
/// Overload for findNearestSymbol() for callers who don't care
/// about nextaddr.
- bool
- findNearestSymbol(Addr addr, std::string &symbol, Addr &symaddr) const
+ const_iterator
+ findNearest(Addr addr) const
{
- ATable::const_iterator i;
+ AddrMap::const_iterator i = addrMap.end();
if (!upperBound(addr, i))
- return false;
+ return end();
--i;
- symaddr = i->first;
- symbol = i->second;
- return true;
- }
-
-
- bool
- findNearestAddr(Addr addr, Addr &symaddr, Addr &nextaddr) const
- {
- ATable::const_iterator i;
- if (!upperBound(addr, i))
- return false;
-
- nextaddr = i->first;
- --i;
- symaddr = i->first;
- return true;
- }
-
- bool
- findNearestAddr(Addr addr, Addr &symaddr) const
- {
- ATable::const_iterator i;
- if (!upperBound(addr, i))
- return false;
-
- --i;
- symaddr = i->first;
- return true;
+ return symbols.begin() + i->second;
}
};
@@ -173,7 +235,7 @@
/// there should be one of these per System object for full system,
/// and per Process object for non-full-system, but so far one big
/// global one has worked well enough.
-extern SymbolTable *debugSymbolTable;
+extern SymbolTable debugSymbolTable;
} // namespace Loader
diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py
index ab70d1d..e487cbb 100644
--- a/src/cpu/BaseCPU.py
+++ b/src/cpu/BaseCPU.py
@@ -149,7 +149,6 @@
"enable statistics pseudo instructions")
profile = Param.Latency('0ns', "trace the kernel stack")
- do_quiesce = Param.Bool(True, "enable quiesce instructions")
wait_for_remote_gdb = Param.Bool(False,
"Wait for a remote GDB connection");
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 3647482..dc3cbf0 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -751,21 +751,24 @@
void
BaseCPU::traceFunctionsInternal(Addr pc)
{
- if (!Loader::debugSymbolTable)
+ if (Loader::debugSymbolTable.empty())
return;
// if pc enters different function, print new function symbol and
// update saved range. Otherwise do nothing.
if (pc < currentFunctionStart || pc >= currentFunctionEnd) {
- string sym_str;
- bool found = Loader::debugSymbolTable->findNearestSymbol(
- pc, sym_str, currentFunctionStart, currentFunctionEnd);
+ auto it = Loader::debugSymbolTable.findNearest(
+ pc, currentFunctionEnd);
- if (!found) {
+ string sym_str;
+ if (it == Loader::debugSymbolTable.end()) {
// no symbol found: use addr as label
- sym_str = csprintf("0x%x", pc);
+ sym_str = csprintf("%#x", pc);
currentFunctionStart = pc;
currentFunctionEnd = pc + 1;
+ } else {
+ sym_str = it->name;
+ currentFunctionStart = it->address;
}
ccprintf(*functionTraceStream, " (%d)\n%d: %s",
diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc
index 06154da..c9c8b68 100644
--- a/src/cpu/exetrace.cc
+++ b/src/cpu/exetrace.cc
@@ -76,34 +76,34 @@
if (Debug::ExecThread)
outs << "T" << thread->threadId() << " : ";
- std::string sym_str;
- Addr sym_addr;
Addr cur_pc = pc.instAddr();
- if (Loader::debugSymbolTable && Debug::ExecSymbol &&
- (!FullSystem || !inUserMode(thread)) &&
- Loader::debugSymbolTable->findNearestSymbol(
- cur_pc, sym_str, sym_addr)) {
- if (cur_pc != sym_addr)
- sym_str += csprintf("+%d",cur_pc - sym_addr);
- outs << "@" << sym_str;
+ Loader::SymbolTable::const_iterator it;
+ if (Debug::ExecSymbol && (!FullSystem || !inUserMode(thread)) &&
+ (it = Loader::debugSymbolTable.findNearest(cur_pc)) !=
+ Loader::debugSymbolTable.end()) {
+ Addr delta = cur_pc - it->address;
+ if (delta)
+ ccprintf(outs, "@%s+%d", it->name, delta);
+ else
+ ccprintf(outs, "@%s", it->name);
} else {
- outs << "0x" << hex << cur_pc;
+ ccprintf(outs, "%#x", cur_pc);
}
if (inst->isMicroop()) {
- outs << "." << setw(2) << dec << pc.microPC();
+ ccprintf(outs, ".%2d", pc.microPC());
} else {
- outs << " ";
+ ccprintf(outs, " ");
}
- outs << " : ";
+ ccprintf(outs, " : ");
//
// Print decoded instruction
//
outs << setw(26) << left;
- outs << inst->disassemble(cur_pc, Loader::debugSymbolTable);
+ outs << inst->disassemble(cur_pc, &Loader::debugSymbolTable);
if (ran) {
outs << " : ";
diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc
index 38fea19..d44bb3d 100644
--- a/src/cpu/kvm/base.cc
+++ b/src/cpu/kvm/base.cc
@@ -223,7 +223,7 @@
void
BaseKvmCPU::finishMMIOPending()
{
- assert(_status = RunningMMIOPending);
+ assert(_status == RunningMMIOPending);
assert(!tickEvent.scheduled());
_status = RunningServiceCompletion;
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index e50d498..e4a9dc0 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014,2017-2018 ARM Limited
+ * Copyright (c) 2013-2014,2017-2018,2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -1029,10 +1029,11 @@
bool is_load = request->isLoad;
bool is_llsc = request->request->isLLSC();
+ bool is_release = request->request->isRelease();
bool is_swap = request->request->isSwap();
bool is_atomic = request->request->isAtomic();
bool bufferable = !(request->request->isStrictlyOrdered() ||
- is_llsc || is_swap || is_atomic);
+ is_llsc || is_swap || is_atomic || is_release);
if (is_load) {
if (numStoresInTransfers != 0) {
@@ -1050,6 +1051,15 @@
}
}
+ // Process store conditionals or store release after all previous
+ // stores are completed
+ if (((!is_load && is_llsc) || is_release) &&
+ !storeBuffer.isDrained()) {
+ DPRINTF(MinorMem, "Memory access needs to wait for store buffer"
+ " to drain\n");
+ return;
+ }
+
/* Check if this is the head instruction (and so must be executable as
* its stream sequence number was checked above) for loads which must
* not be speculatively issued and stores which must be issued here */
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index f7fb3fe..7383c6f 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2010-2014, 2017-2019 ARM Limited
+ * Copyright (c) 2010-2014, 2017-2020 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -753,6 +753,21 @@
DynInstPtr inst = storeWBIt->instruction();
LSQRequest* req = storeWBIt->request();
+
+ // Process store conditionals or store release after all previous
+ // stores are completed
+ if ((req->mainRequest()->isLLSC() ||
+ req->mainRequest()->isRelease()) &&
+ (storeWBIt.idx() != storeQueue.head())) {
+ DPRINTF(LSQUnit, "Store idx:%i PC:%s to Addr:%#x "
+ "[sn:%lli] is %s%s and not head of the queue\n",
+ storeWBIt.idx(), inst->pcState(),
+ req->request()->getPaddr(), inst->seqNum,
+ req->mainRequest()->isLLSC() ? "SC" : "",
+ req->mainRequest()->isRelease() ? "/Release" : "");
+ break;
+ }
+
storeWBIt->committed() = true;
assert(!inst->memData);
diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh
index c4a3310..3d24b1f 100644
--- a/src/cpu/o3/mem_dep_unit.hh
+++ b/src/cpu/o3/mem_dep_unit.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2014 ARM Limited
+ * Copyright (c) 2012, 2014, 2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -45,6 +45,7 @@
#include <memory>
#include <set>
#include <unordered_map>
+#include <unordered_set>
#include "base/statistics.hh"
#include "cpu/inst_seq.hh"
@@ -177,7 +178,7 @@
public:
/** Constructs a memory dependence entry. */
MemDepEntry(const DynInstPtr &new_inst)
- : inst(new_inst), regsReady(false), memDepReady(false),
+ : inst(new_inst), regsReady(false), memDeps(0),
completed(false), squashed(false)
{
#ifdef DEBUG
@@ -216,8 +217,8 @@
/** If the registers are ready or not. */
bool regsReady;
- /** If all memory dependencies have been satisfied. */
- bool memDepReady;
+ /** Number of memory dependencies that need to be satisfied. */
+ int memDeps;
/** If the instruction is completed. */
bool completed;
/** If the instruction is squashed. */
@@ -257,14 +258,20 @@
*/
MemDepPred depPred;
+ /** Sequence numbers of outstanding load barriers. */
+ std::unordered_set<InstSeqNum> loadBarrierSNs;
+
+ /** Sequence numbers of outstanding store barriers. */
+ std::unordered_set<InstSeqNum> storeBarrierSNs;
+
/** Is there an outstanding load barrier that loads must wait on. */
- bool loadBarrier;
- /** The sequence number of the load barrier. */
- InstSeqNum loadBarrierSN;
+ bool hasLoadBarrier() const { return !loadBarrierSNs.empty(); }
+
/** Is there an outstanding store barrier that loads must wait on. */
- bool storeBarrier;
- /** The sequence number of the store barrier. */
- InstSeqNum storeBarrierSN;
+ bool hasStoreBarrier() const { return !storeBarrierSNs.empty(); }
+
+ /** Inserts the SN of a barrier inst. to the list of tracked barriers */
+ void insertBarrierSN(const DynInstPtr &barr_inst);
/** Pointer to the IQ. */
InstructionQueue<Impl> *iqPtr;
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index c712965..d1eac29 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2014 ARM Limited
+ * Copyright (c) 2012, 2014, 2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -42,6 +42,7 @@
#define __CPU_O3_MEM_DEP_UNIT_IMPL_HH__
#include <map>
+#include <vector>
#include "cpu/o3/inst_queue.hh"
#include "cpu/o3/mem_dep_unit.hh"
@@ -50,8 +51,7 @@
template <class MemDepPred, class Impl>
MemDepUnit<MemDepPred, Impl>::MemDepUnit()
- : loadBarrier(false), loadBarrierSN(0), storeBarrier(false),
- storeBarrierSN(0), iqPtr(NULL)
+ : iqPtr(NULL)
{
}
@@ -60,8 +60,7 @@
: _name(params->name + ".memdepunit"),
depPred(params->store_set_clear_period, params->SSITSize,
params->LFSTSize),
- loadBarrier(false), loadBarrierSN(0), storeBarrier(false),
- storeBarrierSN(0), iqPtr(NULL)
+ iqPtr(NULL)
{
DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n");
}
@@ -155,8 +154,8 @@
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
{
// Be sure to reset all state.
- loadBarrier = storeBarrier = false;
- loadBarrierSN = storeBarrierSN = 0;
+ loadBarrierSNs.clear();
+ storeBarrierSNs.clear();
depPred.clear();
}
@@ -169,6 +168,29 @@
template <class MemDepPred, class Impl>
void
+MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const DynInstPtr &barr_inst)
+{
+ InstSeqNum barr_sn = barr_inst->seqNum;
+ // Memory barriers block loads and stores, write barriers only stores.
+ if (barr_inst->isMemBarrier()) {
+ loadBarrierSNs.insert(barr_sn);
+ storeBarrierSNs.insert(barr_sn);
+ DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n",
+ barr_inst->pcState(), barr_sn);
+ } else if (barr_inst->isWriteBarrier()) {
+ storeBarrierSNs.insert(barr_sn);
+ DPRINTF(MemDepUnit, "Inserted a write barrier %s SN:%lli\n",
+ barr_inst->pcState(), barr_sn);
+ }
+ if (loadBarrierSNs.size() || storeBarrierSNs.size()) {
+ DPRINTF(MemDepUnit, "Outstanding load barriers = %d; "
+ "store barriers = %d\n",
+ loadBarrierSNs.size(), storeBarrierSNs.size());
+ }
+}
+
+template <class MemDepPred, class Impl>
+void
MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
@@ -188,39 +210,46 @@
// Check any barriers and the dependence predictor for any
// producing memrefs/stores.
- InstSeqNum producing_store;
- if ((inst->isLoad() || inst->isAtomic()) && loadBarrier) {
- DPRINTF(MemDepUnit, "Load barrier [sn:%lli] in flight\n",
- loadBarrierSN);
- producing_store = loadBarrierSN;
- } else if ((inst->isStore() || inst->isAtomic()) && storeBarrier) {
- DPRINTF(MemDepUnit, "Store barrier [sn:%lli] in flight\n",
- storeBarrierSN);
- producing_store = storeBarrierSN;
+ std::vector<InstSeqNum> producing_stores;
+ if ((inst->isLoad() || inst->isAtomic()) && hasLoadBarrier()) {
+ DPRINTF(MemDepUnit, "%d load barriers in flight\n",
+ loadBarrierSNs.size());
+ producing_stores.insert(std::end(producing_stores),
+ std::begin(loadBarrierSNs),
+ std::end(loadBarrierSNs));
+ } else if ((inst->isStore() || inst->isAtomic()) && hasStoreBarrier()) {
+ DPRINTF(MemDepUnit, "%d store barriers in flight\n",
+ storeBarrierSNs.size());
+ producing_stores.insert(std::end(producing_stores),
+ std::begin(storeBarrierSNs),
+ std::end(storeBarrierSNs));
} else {
- producing_store = depPred.checkInst(inst->instAddr());
+ InstSeqNum dep = depPred.checkInst(inst->instAddr());
+ if (dep != 0)
+ producing_stores.push_back(dep);
}
- MemDepEntryPtr store_entry = NULL;
+ std::vector<MemDepEntryPtr> store_entries;
// If there is a producing store, try to find the entry.
- if (producing_store != 0) {
- DPRINTF(MemDepUnit, "Searching for producer\n");
+ for (auto producing_store : producing_stores) {
+ DPRINTF(MemDepUnit, "Searching for producer [sn:%lli]\n",
+ producing_store);
MemDepHashIt hash_it = memDepHash.find(producing_store);
if (hash_it != memDepHash.end()) {
- store_entry = (*hash_it).second;
- DPRINTF(MemDepUnit, "Proucer found\n");
+ store_entries.push_back((*hash_it).second);
+ DPRINTF(MemDepUnit, "Producer found\n");
}
}
// If no store entry, then instruction can issue as soon as the registers
// are ready.
- if (!store_entry) {
+ if (store_entries.empty()) {
DPRINTF(MemDepUnit, "No dependency for inst PC "
"%s [sn:%lli].\n", inst->pcState(), inst->seqNum);
- inst_entry->memDepReady = true;
+ assert(inst_entry->memDeps == 0);
if (inst->readyToIssue()) {
inst_entry->regsReady = true;
@@ -229,8 +258,9 @@
}
} else {
// Otherwise make the instruction dependent on the store/barrier.
- DPRINTF(MemDepUnit, "Adding to dependency list; "
- "inst PC %s is dependent on [sn:%lli].\n",
+ DPRINTF(MemDepUnit, "Adding to dependency list\n");
+ for (auto M5_VAR_USED producing_store : producing_stores)
+ DPRINTF(MemDepUnit, "\tinst PC %s is dependent on [sn:%lli].\n",
inst->pcState(), producing_store);
if (inst->readyToIssue()) {
@@ -241,7 +271,10 @@
inst->clearCanIssue();
// Add this instruction to the list of dependents.
- store_entry->dependInsts.push_back(inst_entry);
+ for (auto store_entry : store_entries)
+ store_entry->dependInsts.push_back(inst_entry);
+
+ inst_entry->memDeps = store_entries.size();
if (inst->isLoad()) {
++conflictingLoads;
@@ -250,6 +283,9 @@
}
}
+ // for load-acquire store-release that could also be a barrier
+ insertBarrierSN(inst);
+
if (inst->isStore() || inst->isAtomic()) {
DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
@@ -268,21 +304,7 @@
void
MemDepUnit<MemDepPred, Impl>::insertNonSpec(const DynInstPtr &inst)
{
- ThreadID tid = inst->threadNumber;
-
- MemDepEntryPtr inst_entry = std::make_shared<MemDepEntry>(inst);
-
- // Insert the MemDepEntry into the hash.
- memDepHash.insert(
- std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
-#ifdef DEBUG
- MemDepEntry::memdep_insert++;
-#endif
-
- // Add the instruction to the list.
- instList[tid].push_back(inst);
-
- inst_entry->listIt = --(instList[tid].end());
+ insertBarrier(inst);
// Might want to turn this part into an inline function or something.
// It's shared between both insert functions.
@@ -304,28 +326,13 @@
void
MemDepUnit<MemDepPred, Impl>::insertBarrier(const DynInstPtr &barr_inst)
{
- InstSeqNum barr_sn = barr_inst->seqNum;
- // Memory barriers block loads and stores, write barriers only stores.
- if (barr_inst->isMemBarrier()) {
- loadBarrier = true;
- loadBarrierSN = barr_sn;
- storeBarrier = true;
- storeBarrierSN = barr_sn;
- DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n",
- barr_inst->pcState(),barr_sn);
- } else if (barr_inst->isWriteBarrier()) {
- storeBarrier = true;
- storeBarrierSN = barr_sn;
- DPRINTF(MemDepUnit, "Inserted a write barrier\n");
- }
-
ThreadID tid = barr_inst->threadNumber;
MemDepEntryPtr inst_entry = std::make_shared<MemDepEntry>(barr_inst);
// Add the MemDepEntry to the hash.
memDepHash.insert(
- std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
+ std::pair<InstSeqNum, MemDepEntryPtr>(barr_inst->seqNum, inst_entry));
#ifdef DEBUG
MemDepEntry::memdep_insert++;
#endif
@@ -334,6 +341,8 @@
instList[tid].push_back(barr_inst);
inst_entry->listIt = --(instList[tid].end());
+
+ insertBarrierSN(barr_inst);
}
template <class MemDepPred, class Impl>
@@ -348,7 +357,7 @@
inst_entry->regsReady = true;
- if (inst_entry->memDepReady) {
+ if (inst_entry->memDeps == 0) {
DPRINTF(MemDepUnit, "Instruction has its memory "
"dependencies resolved, adding it to the ready list.\n");
@@ -430,18 +439,19 @@
{
wakeDependents(inst);
completed(inst);
-
InstSeqNum barr_sn = inst->seqNum;
- DPRINTF(MemDepUnit, "barrier completed: %s SN:%lli\n", inst->pcState(),
- inst->seqNum);
if (inst->isMemBarrier()) {
- if (loadBarrierSN == barr_sn)
- loadBarrier = false;
- if (storeBarrierSN == barr_sn)
- storeBarrier = false;
+ assert(hasLoadBarrier());
+ assert(hasStoreBarrier());
+ loadBarrierSNs.erase(barr_sn);
+ storeBarrierSNs.erase(barr_sn);
+ DPRINTF(MemDepUnit, "Memory barrier completed: %s SN:%lli\n",
+ inst->pcState(), inst->seqNum);
} else if (inst->isWriteBarrier()) {
- if (storeBarrierSN == barr_sn)
- storeBarrier = false;
+ assert(hasStoreBarrier());
+ storeBarrierSNs.erase(barr_sn);
+ DPRINTF(MemDepUnit, "Write barrier completed: %s SN:%lli\n",
+ inst->pcState(), inst->seqNum);
}
}
@@ -469,10 +479,13 @@
"[sn:%lli].\n",
woken_inst->inst->seqNum);
- if (woken_inst->regsReady && !woken_inst->squashed) {
+ assert(woken_inst->memDeps > 0);
+ woken_inst->memDeps -= 1;
+
+ if ((woken_inst->memDeps == 0) &&
+ woken_inst->regsReady &&
+ !woken_inst->squashed) {
moveToReady(woken_inst);
- } else {
- woken_inst->memDepReady = true;
}
}
@@ -507,11 +520,9 @@
DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n",
(*squash_it)->seqNum);
- if ((*squash_it)->seqNum == loadBarrierSN)
- loadBarrier = false;
+ loadBarrierSNs.erase((*squash_it)->seqNum);
- if ((*squash_it)->seqNum == storeBarrierSN)
- storeBarrier = false;
+ storeBarrierSNs.erase((*squash_it)->seqNum);
hash_it = memDepHash.find((*squash_it)->seqNum);
diff --git a/src/cpu/profile.cc b/src/cpu/profile.cc
index aa2cff8..fee0681 100644
--- a/src/cpu/profile.cc
+++ b/src/cpu/profile.cc
@@ -57,6 +57,7 @@
ccprintf(os, "\n");
+ Loader::SymbolTable::const_iterator it;
for (i = children.begin(); i != end; ++i) {
Addr addr = i->first;
string symbol;
@@ -66,7 +67,9 @@
symbol = "console";
else if (addr == 3)
symbol = "unknown";
- else if (!symtab->findSymbol(addr, symbol))
+ else if ((it = symtab->find(addr)) != symtab->end())
+ symbol = it->name;
+ else
panic("could not find symbol for address %#x\n", addr);
const ProfileNode *node = i->second;
@@ -127,13 +130,15 @@
Addr pc = i->first;
Counter count = i->second;
- std::string symbol;
- if (pc == 1)
+ Loader::SymbolTable::const_iterator it;
+ if (pc == 1) {
ccprintf(os, "user %d\n", count);
- else if (symtab->findSymbol(pc, symbol) && !symbol.empty())
- ccprintf(os, "%s %d\n", symbol, count);
- else
+ } else if ((it = symtab->find(pc)) != symtab->end() &&
+ !it->name.empty()) {
+ ccprintf(os, "%s %d\n", it->name, count);
+ } else {
ccprintf(os, "%#x %d\n", pc, count);
+ }
}
ccprintf(os, ">>>function data\n");
@@ -145,9 +150,9 @@
{
node->count++;
- Addr symaddr;
- if (symtab->findNearestAddr(pc, symaddr)) {
- pc_count[symaddr]++;
+ auto it = symtab->findNearest(pc);
+ if (it != symtab->end()) {
+ pc_count[it->address]++;
} else {
// record PC even if we don't have a symbol to avoid
// silently biasing the histogram
diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc
index de6997a..9b93d75 100644
--- a/src/cpu/thread_context.cc
+++ b/src/cpu/thread_context.cc
@@ -130,9 +130,6 @@
void
ThreadContext::quiesce()
{
- if (!getCpuPtr()->params()->do_quiesce)
- return;
-
DPRINTF(Quiesce, "%s: quiesce()\n", getCpuPtr()->name());
suspend();
@@ -146,9 +143,6 @@
{
BaseCPU *cpu = getCpuPtr();
- if (!cpu->params()->do_quiesce)
- return;
-
EndQuiesceEvent *quiesceEvent = getQuiesceEvent();
cpu->reschedule(quiesceEvent, resume, true);
diff --git a/src/dev/net/etherswitch.cc b/src/dev/net/etherswitch.cc
index e7a60a1..972cf56 100644
--- a/src/dev/net/etherswitch.cc
+++ b/src/dev/net/etherswitch.cc
@@ -184,7 +184,7 @@
if (!sendPacket(outputFifo.front())) {
DPRINTF(Ethernet, "output port busy...retry later\n");
if (!txEvent.scheduled())
- parent->schedule(txEvent, curTick() + retryTime);
+ parent->schedule(txEvent, curTick() + SimClock::Int::ns);
} else {
DPRINTF(Ethernet, "packet sent: len=%d\n", outputFifo.front()->length);
outputFifo.pop();
diff --git a/src/dev/net/ethertap.cc b/src/dev/net/ethertap.cc
index 33bafb7..f4aba21 100644
--- a/src/dev/net/ethertap.cc
+++ b/src/dev/net/ethertap.cc
@@ -196,7 +196,7 @@
DPRINTF(Ethernet, "bus busy...buffer for retransmission\n");
packetBuffer.push(packet);
if (!txEvent.scheduled())
- schedule(txEvent, curTick() + retryTime);
+ schedule(txEvent, curTick() + SimClock::Int::ns);
} else if (dump) {
dump->dump(packet);
}
@@ -218,7 +218,7 @@
}
if (!packetBuffer.empty() && !txEvent.scheduled())
- schedule(txEvent, curTick() + retryTime);
+ schedule(txEvent, curTick() + SimClock::Int::ns);
}
diff --git a/src/dev/net/ns_gige.cc b/src/dev/net/ns_gige.cc
index 68c4497..71f449a 100644
--- a/src/dev/net/ns_gige.cc
+++ b/src/dev/net/ns_gige.cc
@@ -1395,7 +1395,7 @@
if (!txFifo.empty() && !txEvent.scheduled()) {
DPRINTF(Ethernet, "reschedule transmit\n");
- schedule(txEvent, curTick() + retryTime);
+ schedule(txEvent, curTick() + SimClock::Int::ns);
}
}
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index fee0254..7eaf65f 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -129,6 +129,8 @@
"memory pipeline's queues")
local_mem_queue_size = Param.Int(256, "Number of entries in the local "
"memory pipeline's queues")
+ max_cu_tokens = Param.Int(4, "Maximum number of tokens, i.e., the number"\
+ " of instructions that can be sent to coalescer")
ldsBus = Bridge() # the bridge between the CU and its LDS
ldsPort = MasterPort("The port that goes to the LDS")
localDataStore = Param.LdsState("the LDS for this CU")
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 59bc6a0..cd880d6 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -74,9 +74,9 @@
req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
_masterId(p->system->getMasterId(this, "ComputeUnit")),
- lds(*p->localDataStore), _cacheLineSize(p->system->cacheLineSize()),
- globalSeqNum(0), wavefrontSize(p->wfSize),
- kernelLaunchInst(new KernelLaunchStaticInst())
+ lds(*p->localDataStore), gmTokenPort(name() + ".gmTokenPort", this),
+ _cacheLineSize(p->system->cacheLineSize()), globalSeqNum(0),
+ wavefrontSize(p->wfSize), kernelLaunchInst(new KernelLaunchStaticInst())
{
/**
* This check is necessary because std::bitset only provides conversion
@@ -139,6 +139,10 @@
memPort.resize(wfSize());
+ // Setup tokens for slave ports. The number of tokens in memSlaveTokens
+ // is the total token count for the entire vector port (i.e., this CU).
+ memPortTokens = new TokenManager(p->max_cu_tokens);
+
// resize the tlbPort vectorArray
int tlbPort_width = perLaneTLB ? wfSize() : 1;
tlbPort.resize(tlbPort_width);
@@ -612,6 +616,8 @@
vectorAluInstAvail.resize(numSIMDs, false);
shrMemInstAvail = 0;
glbMemInstAvail = 0;
+
+ gmTokenPort.setTokenManager(memPortTokens);
}
bool
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh
index a023cb2..49713e9 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -51,6 +51,7 @@
#include "gpu-compute/schedule_stage.hh"
#include "gpu-compute/scoreboard_check_stage.hh"
#include "mem/port.hh"
+#include "mem/token_port.hh"
#include "sim/clocked_object.hh"
static const int MAX_REGS_FOR_NON_VEC_MEM_INST = 1;
@@ -415,6 +416,26 @@
CUExitCallback *cuExitCallback;
+ class GMTokenPort : public TokenMasterPort
+ {
+ public:
+ GMTokenPort(const std::string& name, SimObject *owner,
+ PortID id = InvalidPortID)
+ : TokenMasterPort(name, owner, id)
+ { }
+ ~GMTokenPort() { }
+
+ protected:
+ bool recvTimingResp(PacketPtr) { return false; }
+ void recvReqRetry() { }
+ };
+
+ // Manager for the number of tokens available to this compute unit to
+ // send global memory request packets to the coalescer this is only used
+ // between global memory pipe and TCP coalescer.
+ TokenManager *memPortTokens;
+ GMTokenPort gmTokenPort;
+
/** Data access Port **/
class DataPort : public MasterPort
{
@@ -677,6 +698,12 @@
return ldsPort;
}
+ TokenManager *
+ getTokenManager()
+ {
+ return memPortTokens;
+ }
+
/** The memory port for SIMD data accesses.
* Can be connected to PhysMem for Ruby for timing simulations
*/
@@ -712,6 +739,8 @@
}
ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
return *ldsPort;
+ } else if (if_name == "gmTokenPort") {
+ return gmTokenPort;
} else {
panic("incorrect port name");
}
diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc
index d8e6d47..64778f0 100644
--- a/src/gpu-compute/global_memory_pipeline.cc
+++ b/src/gpu-compute/global_memory_pipeline.cc
@@ -33,6 +33,7 @@
#include "gpu-compute/global_memory_pipeline.hh"
+#include "debug/GPUCoalescer.hh"
#include "debug/GPUMem.hh"
#include "debug/GPUReg.hh"
#include "gpu-compute/compute_unit.hh"
@@ -56,6 +57,25 @@
_name = computeUnit->name() + ".GlobalMemPipeline";
}
+bool
+GlobalMemPipeline::coalescerReady(GPUDynInstPtr mp) const
+{
+ // We require one token from the coalescer's uncoalesced table to
+ // proceed
+ int token_count = 1;
+
+ // Make sure the vector port has tokens. There is a single pool
+ // of tokens so only one port in the vector port needs to be checked.
+ // Lane 0 is chosen arbirarily.
+ DPRINTF(GPUCoalescer, "Checking for %d tokens\n", token_count);
+ if (!mp->computeUnit()->getTokenManager()->haveTokens(token_count)) {
+ DPRINTF(GPUCoalescer, "Stalling inst because coalsr is busy!\n");
+ return false;
+ }
+
+ return true;
+}
+
void
GlobalMemPipeline::exec()
{
@@ -124,6 +144,14 @@
}
}
+ DPRINTF(GPUCoalescer, "initiateAcc for %s seqNum %d\n",
+ mp->disassemble(), mp->seqNum());
+ // Memfences will not return tokens and must be issued so we should
+ // not request one as this will deplete the token count until deadlock
+ if (!mp->isMemFence()) {
+ assert(mp->computeUnit()->getTokenManager()->haveTokens(1));
+ mp->computeUnit()->getTokenManager()->acquireTokens(1);
+ }
mp->initiateAcc(mp);
if (!outOfOrderDataDelivery && !mp->isMemFence()) {
diff --git a/src/gpu-compute/global_memory_pipeline.hh b/src/gpu-compute/global_memory_pipeline.hh
index 0bc8596..2f83185 100644
--- a/src/gpu-compute/global_memory_pipeline.hh
+++ b/src/gpu-compute/global_memory_pipeline.hh
@@ -121,6 +121,8 @@
loadVrfBankConflictCycles += num_cycles;
}
+ bool coalescerReady(GPUDynInstPtr mp) const;
+
private:
ComputeUnit *computeUnit;
std::string _name;
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index e70a874..46cce9c 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -434,6 +434,11 @@
return 0;
}
+ // Does the coalescer have space for our instruction?
+ if (!computeUnit->globalMemoryPipe.coalescerReady(ii)) {
+ return 0;
+ }
+
if (!computeUnit->globalMemoryPipe.
isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
// Can we insert a new request to the Global Mem Request FIFO?
@@ -504,6 +509,12 @@
if (!locMemIssueRdy) {
return 0;
}
+
+ // Does the coalescer have space for our instruction?
+ if (!computeUnit->globalMemoryPipe.coalescerReady(ii)) {
+ return 0;
+ }
+
if (!computeUnit->globalMemoryPipe.
isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
// Can we insert a new request to the Global Mem Request FIFO?
diff --git a/src/kern/linux/helpers.cc b/src/kern/linux/helpers.cc
index 9286ab0..80ff526 100644
--- a/src/kern/linux/helpers.cc
+++ b/src/kern/linux/helpers.cc
@@ -96,24 +96,25 @@
const auto *symtab = system->workload->symtab(tc);
PortProxy &proxy = tc->getVirtProxy();
- Addr addr_lb = 0, addr_lb_len = 0, addr_first = 0, addr_next = 0;
- const bool found_symbols =
- symtab->findAddress("__log_buf", addr_lb) &&
- symtab->findAddress("log_buf_len", addr_lb_len) &&
- symtab->findAddress("log_first_idx", addr_first) &&
- symtab->findAddress("log_next_idx", addr_next);
+ auto lb = symtab->find("__log_buf");
+ auto lb_len = symtab->find("log_buf_len");
+ auto first = symtab->find("log_first_idx");
+ auto next = symtab->find("log_next_idx");
- if (!found_symbols) {
+ auto end_it = symtab->end();
+
+ if (lb == end_it || lb_len == end_it ||
+ first == end_it || next == end_it) {
warn("Failed to find kernel dmesg symbols.\n");
return;
}
uint32_t log_buf_len =
- proxy.read<uint32_t>(addr_lb_len, TheISA::GuestByteOrder);
+ proxy.read<uint32_t>(lb_len->address, TheISA::GuestByteOrder);
uint32_t log_first_idx =
- proxy.read<uint32_t>(addr_first, TheISA::GuestByteOrder);
+ proxy.read<uint32_t>(first->address, TheISA::GuestByteOrder);
uint32_t log_next_idx =
- proxy.read<uint32_t>(addr_next, TheISA::GuestByteOrder);
+ proxy.read<uint32_t>(next->address, TheISA::GuestByteOrder);
if (log_first_idx >= log_buf_len || log_next_idx >= log_buf_len) {
warn("dmesg pointers/length corrupted\n");
@@ -129,7 +130,7 @@
warn("Unexpected dmesg buffer length\n");
return;
}
- proxy.readBlob(addr_lb + log_first_idx, log_buf.data(), length);
+ proxy.readBlob(lb->address + log_first_idx, log_buf.data(), length);
} else {
const int length_2 = log_buf_len - log_first_idx;
if (length_2 < 0 || length_2 + log_next_idx > log_buf.size()) {
@@ -137,8 +138,8 @@
return;
}
length = log_buf_len;
- proxy.readBlob(addr_lb + log_first_idx, log_buf.data(), length_2);
- proxy.readBlob(addr_lb, log_buf.data() + length_2, log_next_idx);
+ proxy.readBlob(lb->address + log_first_idx, log_buf.data(), length_2);
+ proxy.readBlob(lb->address, log_buf.data() + length_2, log_next_idx);
}
// Print dmesg buffer content
diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc
index 9323f61..678527e 100644
--- a/src/mem/abstract_mem.cc
+++ b/src/mem/abstract_mem.cc
@@ -80,7 +80,7 @@
auto *object = Loader::createObjectFile(file, true);
fatal_if(!object, "%s: Could not load %s.", name(), file);
- panic_if(!object->loadGlobalSymbols(Loader::debugSymbolTable),
+ panic_if(!object->loadGlobalSymbols(&Loader::debugSymbolTable),
"%s: Could not load symbols from %s.", name(), file);
Loader::MemoryImage image = object->buildImage();
diff --git a/src/mem/cache/prefetch/pif.cc b/src/mem/cache/prefetch/pif.cc
index 6a2983b..c557bd2 100644
--- a/src/mem/cache/prefetch/pif.cc
+++ b/src/mem/cache/prefetch/pif.cc
@@ -75,12 +75,12 @@
Addr blk_distance = distanceFromTrigger(pc, log_blk_size);
bool hit = (pc > trigger) ?
- (succ.size() >= blk_distance) : (prec.size() >= blk_distance);
+ (succ.size() > blk_distance) : (prec.size() > blk_distance);
if (hit && update) {
if (pc > trigger) {
- succ[blk_distance - 1] = true;
+ succ[blk_distance] = true;
} else if (pc < trigger) {
- prec[blk_distance - 1] = true;
+ prec[blk_distance] = true;
}
}
return hit;
@@ -93,9 +93,9 @@
Addr blk_distance = distanceFromTrigger(target, log_blk_size);
bool hit = false;
if (target > trigger) {
- hit = blk_distance <= succ.size() && succ[blk_distance - 1];
+ hit = blk_distance < succ.size() && succ[blk_distance];
} else if (target < trigger) {
- hit = blk_distance <= prec.size() && succ[blk_distance - 1];
+ hit = blk_distance < prec.size() && prec[blk_distance];
} else {
hit = true;
}
@@ -134,6 +134,7 @@
// First access to the prefetcher
if (temporalCompactor.size() == 0) {
spatialCompactor = CompactorEntry(pc, precSize, succSize);
+ temporalCompactor.push_back(spatialCompactor);
} else {
// If the PC of the instruction retired is in the same spatial region
// than the last trigger address, update the bit vectors based on the
@@ -195,12 +196,16 @@
PIF::calculatePrefetch(const PrefetchInfo &pfi,
std::vector<AddrPriority> &addresses)
{
- const Addr addr = pfi.getAddr();
+ if (!pfi.hasPC()) {
+ return;
+ }
+
+ const Addr pc = pfi.getPC();
// First check if the access has been prefetched, this is done by
// comparing the access against the active Stream Address Buffers
for (auto &sabEntry : streamAddressBuffer) {
- if (sabEntry->hasAddress(addr, lBlkSize)) {
+ if (sabEntry->hasAddress(pc, lBlkSize)) {
sabEntry++;
sabEntry->getPredictedAddresses(lBlkSize, addresses);
// We are done
@@ -210,7 +215,7 @@
// Check if a valid entry in the 'index' table is found and allocate a new
// active prediction stream
- IndexEntry *idx_entry = index.findEntry(addr, /* unused */ false);
+ IndexEntry *idx_entry = index.findEntry(pc, /* unused */ false);
if (idx_entry != nullptr) {
index.accessEntry(idx_entry);
diff --git a/src/mem/ruby/common/Address.cc b/src/mem/ruby/common/Address.cc
index 40ce0fe..39de974 100644
--- a/src/mem/ruby/common/Address.cc
+++ b/src/mem/ruby/common/Address.cc
@@ -56,6 +56,12 @@
return mbits<Addr>(addr, 63, RubySystem::getBlockSizeBits());
}
+Addr
+makeLineAddress(Addr addr, int cacheLineBits)
+{
+ return maskLowOrderBits(addr, cacheLineBits);
+}
+
// returns the next stride address based on line address
Addr
makeNextStrideAddress(Addr addr, int stride)
diff --git a/src/mem/ruby/common/Address.hh b/src/mem/ruby/common/Address.hh
index 30682fa..e5e320f 100644
--- a/src/mem/ruby/common/Address.hh
+++ b/src/mem/ruby/common/Address.hh
@@ -40,6 +40,7 @@
Addr maskLowOrderBits(Addr addr, unsigned int number);
Addr getOffset(Addr addr);
Addr makeLineAddress(Addr addr);
+Addr makeLineAddress(Addr addr, int cacheLineBits);
Addr makeNextStrideAddress(Addr addr, int stride);
std::string printAddress(Addr addr);
diff --git a/src/mem/ruby/network/MessageBuffer.cc b/src/mem/ruby/network/MessageBuffer.cc
index f5562dc..3db8515 100644
--- a/src/mem/ruby/network/MessageBuffer.cc
+++ b/src/mem/ruby/network/MessageBuffer.cc
@@ -394,6 +394,42 @@
m_stall_count++;
}
+bool
+MessageBuffer::hasStalledMsg(Addr addr) const
+{
+ return (m_stall_msg_map.count(addr) != 0);
+}
+
+void
+MessageBuffer::deferEnqueueingMessage(Addr addr, MsgPtr message)
+{
+ DPRINTF(RubyQueue, "Deferring enqueueing message: %s, Address %#x\n",
+ *(message.get()), addr);
+ (m_deferred_msg_map[addr]).push_back(message);
+}
+
+void
+MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay)
+{
+ assert(!isDeferredMsgMapEmpty(addr));
+ std::vector<MsgPtr>& msg_vec = m_deferred_msg_map[addr];
+ assert(msg_vec.size() > 0);
+
+ // enqueue all deferred messages associated with this address
+ for (MsgPtr m : msg_vec) {
+ enqueue(m, curTime, delay);
+ }
+
+ msg_vec.clear();
+ m_deferred_msg_map.erase(addr);
+}
+
+bool
+MessageBuffer::isDeferredMsgMapEmpty(Addr addr) const
+{
+ return m_deferred_msg_map.count(addr) == 0;
+}
+
void
MessageBuffer::print(ostream& out) const
{
diff --git a/src/mem/ruby/network/MessageBuffer.hh b/src/mem/ruby/network/MessageBuffer.hh
index 0e11529..8abf3bd 100644
--- a/src/mem/ruby/network/MessageBuffer.hh
+++ b/src/mem/ruby/network/MessageBuffer.hh
@@ -51,6 +51,7 @@
#include <functional>
#include <iostream>
#include <string>
+#include <unordered_map>
#include <vector>
#include "base/trace.hh"
@@ -73,6 +74,8 @@
void reanalyzeMessages(Addr addr, Tick current_time);
void reanalyzeAllMessages(Tick current_time);
void stallMessage(Addr addr, Tick current_time);
+ // return true if the stall map has a message of this address
+ bool hasStalledMsg(Addr addr) const;
// TRUE if head of queue timestamp <= SystemTime
bool isReady(Tick current_time) const;
@@ -113,6 +116,18 @@
void enqueue(MsgPtr message, Tick curTime, Tick delta);
+ // Defer enqueueing a message to a later cycle by putting it aside and not
+ // enqueueing it in this cycle
+ // The corresponding controller will need to explicitly enqueue the
+ // deferred message into the message buffer. Otherwise, the message will
+ // be lost.
+ void deferEnqueueingMessage(Addr addr, MsgPtr message);
+
+ // enqueue all previously deferred messages that are associated with the
+ // input address
+ void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay);
+ bool isDeferredMsgMapEmpty(Addr addr) const;
+
//! Updates the delay cycles of the message at the head of the queue,
//! removes it from the queue and returns its total delay.
Tick dequeue(Tick current_time, bool decrement_messages = true);
@@ -192,6 +207,14 @@
StallMsgMapType m_stall_msg_map;
/**
+ * A map from line addresses to corresponding vectors of messages that
+ * are deferred for enqueueing. Messages in this map are waiting to be
+ * enqueued into the message buffer.
+ */
+ typedef std::unordered_map<Addr, std::vector<MsgPtr>> DeferredMsgMapType;
+ DeferredMsgMapType m_deferred_msg_map;
+
+ /**
* Current size of the stall map.
* Track the number of messages held in stall map lists. This is used to
* ensure that if the buffer is finite-sized, it blocks further requests
diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc
index 57834f2..982b57e 100644
--- a/src/mem/ruby/network/Network.cc
+++ b/src/mem/ruby/network/Network.cc
@@ -55,9 +55,35 @@
m_virtual_networks = p->number_of_virtual_networks;
m_control_msg_size = p->control_msg_size;
- // Total nodes/controllers in network
+ // Populate localNodeVersions with the version of each MachineType in
+ // this network. This will be used to compute a global to local ID.
+ // Do this by looking at the ext_node for each ext_link. There is one
+ // ext_node per ext_link and it points to an AbstractController.
+ // For RubySystems with one network global and local ID are the same.
+ std::unordered_map<MachineType, std::vector<NodeID>> localNodeVersions;
+ for (auto &it : params()->ext_links) {
+ AbstractController *cntrl = it->params()->ext_node;
+ localNodeVersions[cntrl->getType()].push_back(cntrl->getVersion());
+ }
+
+ // Compute a local ID for each MachineType using the same order as SLICC
+ NodeID local_node_id = 0;
+ for (int i = 0; i < MachineType_base_level(MachineType_NUM); ++i) {
+ MachineType mach = static_cast<MachineType>(i);
+ if (localNodeVersions.count(mach)) {
+ for (auto &ver : localNodeVersions.at(mach)) {
+ // Get the global ID Ruby will pass around
+ NodeID global_node_id = MachineType_base_number(mach) + ver;
+ globalToLocalMap.emplace(global_node_id, local_node_id);
+ ++local_node_id;
+ }
+ }
+ }
+
+ // Total nodes/controllers in network is equal to the local node count
// Must make sure this is called after the State Machine constructors
- m_nodes = MachineType_base_number(MachineType_NUM);
+ m_nodes = local_node_id;
+
assert(m_nodes != 0);
assert(m_virtual_networks != 0);
@@ -158,11 +184,11 @@
}
void
-Network::checkNetworkAllocation(NodeID id, bool ordered,
+Network::checkNetworkAllocation(NodeID local_id, bool ordered,
int network_num,
std::string vnet_type)
{
- fatal_if(id >= m_nodes, "Node ID is out of range");
+ fatal_if(local_id >= m_nodes, "Node ID is out of range");
fatal_if(network_num >= m_virtual_networks, "Network id is out of range");
if (ordered) {
@@ -174,25 +200,29 @@
void
-Network::setToNetQueue(NodeID id, bool ordered, int network_num,
+Network::setToNetQueue(NodeID global_id, bool ordered, int network_num,
std::string vnet_type, MessageBuffer *b)
{
- checkNetworkAllocation(id, ordered, network_num, vnet_type);
- while (m_toNetQueues[id].size() <= network_num) {
- m_toNetQueues[id].push_back(nullptr);
+ NodeID local_id = getLocalNodeID(global_id);
+ checkNetworkAllocation(local_id, ordered, network_num, vnet_type);
+
+ while (m_toNetQueues[local_id].size() <= network_num) {
+ m_toNetQueues[local_id].push_back(nullptr);
}
- m_toNetQueues[id][network_num] = b;
+ m_toNetQueues[local_id][network_num] = b;
}
void
-Network::setFromNetQueue(NodeID id, bool ordered, int network_num,
+Network::setFromNetQueue(NodeID global_id, bool ordered, int network_num,
std::string vnet_type, MessageBuffer *b)
{
- checkNetworkAllocation(id, ordered, network_num, vnet_type);
- while (m_fromNetQueues[id].size() <= network_num) {
- m_fromNetQueues[id].push_back(nullptr);
+ NodeID local_id = getLocalNodeID(global_id);
+ checkNetworkAllocation(local_id, ordered, network_num, vnet_type);
+
+ while (m_fromNetQueues[local_id].size() <= network_num) {
+ m_fromNetQueues[local_id].push_back(nullptr);
}
- m_fromNetQueues[id][network_num] = b;
+ m_fromNetQueues[local_id][network_num] = b;
}
NodeID
@@ -212,3 +242,10 @@
}
return MachineType_base_count(mtype);
}
+
+NodeID
+Network::getLocalNodeID(NodeID global_id) const
+{
+ assert(globalToLocalMap.count(global_id));
+ return globalToLocalMap.at(global_id);
+}
diff --git a/src/mem/ruby/network/Network.hh b/src/mem/ruby/network/Network.hh
index 606e670..bba0c5e 100644
--- a/src/mem/ruby/network/Network.hh
+++ b/src/mem/ruby/network/Network.hh
@@ -90,13 +90,13 @@
static uint32_t MessageSizeType_to_int(MessageSizeType size_type);
// returns the queue requested for the given component
- void setToNetQueue(NodeID id, bool ordered, int netNumber,
+ void setToNetQueue(NodeID global_id, bool ordered, int netNumber,
std::string vnet_type, MessageBuffer *b);
- virtual void setFromNetQueue(NodeID id, bool ordered, int netNumber,
+ virtual void setFromNetQueue(NodeID global_id, bool ordered, int netNumber,
std::string vnet_type, MessageBuffer *b);
- virtual void checkNetworkAllocation(NodeID id, bool ordered,
- int network_num, std::string vnet_type);
+ virtual void checkNetworkAllocation(NodeID local_id, bool ordered,
+ int network_num, std::string vnet_type);
virtual void makeExtOutLink(SwitchID src, NodeID dest, BasicLink* link,
const NetDest& routing_table_entry) = 0;
@@ -140,6 +140,8 @@
return RubyDummyPort::instance();
}
+ NodeID getLocalNodeID(NodeID global_id) const;
+
protected:
// Private copy constructor and assignment operator
Network(const Network& obj);
@@ -182,6 +184,10 @@
AddrRangeList ranges;
};
std::unordered_multimap<MachineType, AddrMapNode> addrMap;
+
+ // Global NodeID to local node map. If there are not multiple networks in
+ // the same RubySystem, this is a one-to-one mapping of global to local.
+ std::unordered_map<NodeID, NodeID> globalToLocalMap;
};
inline std::ostream&
diff --git a/src/mem/ruby/network/garnet2.0/GarnetNetwork.cc b/src/mem/ruby/network/garnet2.0/GarnetNetwork.cc
index 1eff921..a88302b 100644
--- a/src/mem/ruby/network/garnet2.0/GarnetNetwork.cc
+++ b/src/mem/ruby/network/garnet2.0/GarnetNetwork.cc
@@ -146,10 +146,11 @@
*/
void
-GarnetNetwork::makeExtInLink(NodeID src, SwitchID dest, BasicLink* link,
+GarnetNetwork::makeExtInLink(NodeID global_src, SwitchID dest, BasicLink* link,
const NetDest& routing_table_entry)
{
- assert(src < m_nodes);
+ NodeID local_src = getLocalNodeID(global_src);
+ assert(local_src < m_nodes);
GarnetExtLink* garnet_link = safe_cast<GarnetExtLink*>(link);
@@ -163,7 +164,7 @@
PortDirection dst_inport_dirn = "Local";
m_routers[dest]->addInPort(dst_inport_dirn, net_link, credit_link);
- m_nis[src]->addOutPort(net_link, credit_link, dest);
+ m_nis[local_src]->addOutPort(net_link, credit_link, dest);
}
/*
@@ -173,10 +174,12 @@
*/
void
-GarnetNetwork::makeExtOutLink(SwitchID src, NodeID dest, BasicLink* link,
- const NetDest& routing_table_entry)
+GarnetNetwork::makeExtOutLink(SwitchID src, NodeID global_dest,
+ BasicLink* link,
+ const NetDest& routing_table_entry)
{
- assert(dest < m_nodes);
+ NodeID local_dest = getLocalNodeID(global_dest);
+ assert(local_dest < m_nodes);
assert(src < m_routers.size());
assert(m_routers[src] != NULL);
@@ -194,7 +197,7 @@
m_routers[src]->addOutPort(src_outport_dirn, net_link,
routing_table_entry,
link->m_weight, credit_link);
- m_nis[dest]->addInPort(net_link, credit_link);
+ m_nis[local_dest]->addInPort(net_link, credit_link);
}
/*
@@ -233,9 +236,11 @@
// Get ID of router connected to a NI.
int
-GarnetNetwork::get_router_id(int ni)
+GarnetNetwork::get_router_id(int global_ni)
{
- return m_nis[ni]->get_router_id();
+ NodeID local_ni = getLocalNodeID(global_ni);
+
+ return m_nis[local_ni]->get_router_id();
}
void
@@ -415,6 +420,20 @@
}
void
+GarnetNetwork::resetStats()
+{
+ for (int i = 0; i < m_routers.size(); i++) {
+ m_routers[i]->resetStats();
+ }
+ for (int i = 0; i < m_networklinks.size(); i++) {
+ m_networklinks[i]->resetStats();
+ }
+ for (int i = 0; i < m_creditlinks.size(); i++) {
+ m_creditlinks[i]->resetStats();
+ }
+}
+
+void
GarnetNetwork::print(ostream& out) const
{
out << "[GarnetNetwork]";
diff --git a/src/mem/ruby/network/garnet2.0/GarnetNetwork.hh b/src/mem/ruby/network/garnet2.0/GarnetNetwork.hh
index 9acbeef..3821dd8 100644
--- a/src/mem/ruby/network/garnet2.0/GarnetNetwork.hh
+++ b/src/mem/ruby/network/garnet2.0/GarnetNetwork.hh
@@ -101,6 +101,7 @@
// Stats
void collateStats();
void regStats();
+ void resetStats();
void print(std::ostream& out) const;
// increment counters
diff --git a/src/mem/ruby/network/garnet2.0/Router.cc b/src/mem/ruby/network/garnet2.0/Router.cc
index 14c0e84..73b7dce 100644
--- a/src/mem/ruby/network/garnet2.0/Router.cc
+++ b/src/mem/ruby/network/garnet2.0/Router.cc
@@ -215,10 +215,8 @@
void
Router::resetStats()
{
- for (int j = 0; j < m_virtual_networks; j++) {
- for (int i = 0; i < m_input_unit.size(); i++) {
+ for (int i = 0; i < m_input_unit.size(); i++) {
m_input_unit[i]->resetStats();
- }
}
crossbarSwitch.resetStats();
diff --git a/src/mem/ruby/network/simple/SimpleNetwork.cc b/src/mem/ruby/network/simple/SimpleNetwork.cc
index 84817e4..d3b5515 100644
--- a/src/mem/ruby/network/simple/SimpleNetwork.cc
+++ b/src/mem/ruby/network/simple/SimpleNetwork.cc
@@ -83,27 +83,30 @@
// From a switch to an endpoint node
void
-SimpleNetwork::makeExtOutLink(SwitchID src, NodeID dest, BasicLink* link,
- const NetDest& routing_table_entry)
+SimpleNetwork::makeExtOutLink(SwitchID src, NodeID global_dest,
+ BasicLink* link,
+ const NetDest& routing_table_entry)
{
- assert(dest < m_nodes);
+ NodeID local_dest = getLocalNodeID(global_dest);
+ assert(local_dest < m_nodes);
assert(src < m_switches.size());
assert(m_switches[src] != NULL);
SimpleExtLink *simple_link = safe_cast<SimpleExtLink*>(link);
- m_switches[src]->addOutPort(m_fromNetQueues[dest], routing_table_entry,
- simple_link->m_latency,
+ m_switches[src]->addOutPort(m_fromNetQueues[local_dest],
+ routing_table_entry, simple_link->m_latency,
simple_link->m_bw_multiplier);
}
// From an endpoint node to a switch
void
-SimpleNetwork::makeExtInLink(NodeID src, SwitchID dest, BasicLink* link,
+SimpleNetwork::makeExtInLink(NodeID global_src, SwitchID dest, BasicLink* link,
const NetDest& routing_table_entry)
{
- assert(src < m_nodes);
- m_switches[dest]->addInPort(m_toNetQueues[src]);
+ NodeID local_src = getLocalNodeID(global_src);
+ assert(local_src < m_nodes);
+ m_switches[dest]->addInPort(m_toNetQueues[local_src]);
}
// From a switch to a switch
diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm
index 5a31d28..15bbdd3 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_directory-L1cache.sm
@@ -74,19 +74,20 @@
I, AccessPermission:Invalid, desc="Idle";
S, AccessPermission:Read_Only, desc="Shared";
O, AccessPermission:Read_Only, desc="Owned";
- M, AccessPermission:Read_Only, desc="Modified (dirty)";
- M_W, AccessPermission:Read_Only, desc="Modified (dirty)";
+ M, AccessPermission:Read_Write, desc="Modified (dirty)";
+ M_W, AccessPermission:Read_Write, desc="Modified (dirty)";
MM, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)";
MM_W, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)";
// Transient States
+ // Notice we still have a valid copy of the block in most states
IM, AccessPermission:Busy, "IM", desc="Issued GetX";
+ IS, AccessPermission:Busy, "IS", desc="Issued GetS";
SM, AccessPermission:Read_Only, "SM", desc="Issued GetX, we still have an old copy of the line";
OM, AccessPermission:Read_Only, "SM", desc="Issued GetX, received data";
- IS, AccessPermission:Busy, "IS", desc="Issued GetS";
- SI, AccessPermission:Busy, "OI", desc="Issued PutS, waiting for ack";
- OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack";
- MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack";
+ SI, AccessPermission:Read_Only, "OI", desc="Issued PutS, waiting for ack";
+ OI, AccessPermission:Read_Only, "OI", desc="Issued PutO, waiting for ack";
+ MI, AccessPermission:Read_Write, "MI", desc="Issued PutX, waiting for ack";
II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Fwd_GETS or Fwd_GETX, waiting for ack";
}
@@ -215,7 +216,6 @@
((cache_entry.CacheState != State:O) && (state == State:O)) ) {
cache_entry.CacheState := state;
- sequencer.checkCoherence(addr);
}
else {
cache_entry.CacheState := state;
@@ -226,13 +226,13 @@
AccessPermission getAccessPermission(Addr addr) {
TBE tbe := TBEs[addr];
if(is_valid(tbe)) {
- DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState));
+ DPRINTF(RubySlicc, "%s,%s\n", tbe.TBEState, L1Cache_State_to_permission(tbe.TBEState));
return L1Cache_State_to_permission(tbe.TBEState);
}
Entry cache_entry := getCacheEntry(addr);
if(is_valid(cache_entry)) {
- DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState));
+ DPRINTF(RubySlicc, "%s,%s\n", cache_entry.CacheState, L1Cache_State_to_permission(cache_entry.CacheState));
return L1Cache_State_to_permission(cache_entry.CacheState);
}
@@ -271,8 +271,10 @@
}
TBE tbe := TBEs[addr];
- num_functional_writes := num_functional_writes +
- testAndWrite(addr, tbe.DataBlk, pkt);
+ if (is_valid(tbe)){
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
return num_functional_writes;
}
diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm
index 18e3b89..9894107 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_directory-L2cache.sm
@@ -66,14 +66,13 @@
state_declaration(State, desc="L2 Cache states", default="L2Cache_State_I") {
// Stable states
- NP, AccessPermission:Invalid, desc="Not Present";
I, AccessPermission:Invalid, desc="Invalid";
- ILS, AccessPermission:Invalid, desc="Idle/NP, but local sharers exist";
- ILX, AccessPermission:Invalid, desc="Idle/NP, but local exclusive exists";
- ILO, AccessPermission:Invalid, desc="Idle/NP, but local owner exists";
- ILOX, AccessPermission:Invalid, desc="Idle/NP, but local owner exists and chip is exclusive";
- ILOS, AccessPermission:Invalid, desc="Idle/NP, but local owner exists and local sharers as well";
- ILOSX, AccessPermission:Invalid, desc="Idle/NP, but local owner exists, local sharers exist, chip is exclusive ";
+ ILS, AccessPermission:Invalid, desc="Not present, but local sharers exist";
+ ILX, AccessPermission:Invalid, desc="Not present, but local exclusive exists";
+ ILO, AccessPermission:Invalid, desc="Not present, but local owner exists";
+ ILOX, AccessPermission:Invalid, desc="Not present, but local owner exists and chip is exclusive";
+ ILOS, AccessPermission:Invalid, desc="Not present, but local owner exists and local sharers as well";
+ ILOSX, AccessPermission:Invalid, desc="Not present, but local owner exists, local sharers exist, chip is exclusive ";
S, AccessPermission:Read_Only, desc="Shared, no local sharers";
O, AccessPermission:Read_Only, desc="Owned, no local sharers";
OLS, AccessPermission:Read_Only, desc="Owned with local sharers";
@@ -86,23 +85,25 @@
IFGX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner/exclusive. No other on-chip invs needed";
IFGS, AccessPermission:Busy, desc="Blocked, forwarded global GETS to local owner";
ISFGS, AccessPermission:Busy, desc="Blocked, forwarded global GETS to local owner, local sharers exist";
- IFGXX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner but may need acks from other sharers";
- OLSF, AccessPermission:Busy, desc="Blocked, got Fwd_GETX with local sharers, waiting for local inv acks";
+ IFGXX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner, waiting for data and acks from other sharers";
+ IFGXXD, AccessPermission:Read_Only, desc="Blocked, was IFGXX and received data, still waiting for acks";
+ OLSF, AccessPermission:Read_Only, desc="Blocked, got Fwd_GETX with local sharers, waiting for local inv acks";
- // writebacks
+ // Writebacks
+ // Notice we still have a valid copy of the block in some states
ILOW, AccessPermission:Busy, desc="local WB request, was ILO";
ILOXW, AccessPermission:Busy, desc="local WB request, was ILOX";
ILOSW, AccessPermission:Busy, desc="local WB request, was ILOS";
ILOSXW, AccessPermission:Busy, desc="local WB request, was ILOSX";
- SLSW, AccessPermission:Busy, desc="local WB request, was SLS";
- OLSW, AccessPermission:Busy, desc="local WB request, was OLS";
ILSW, AccessPermission:Busy, desc="local WB request, was ILS";
IW, AccessPermission:Busy, desc="local WB request from only sharer, was ILS";
- OW, AccessPermission:Busy, desc="local WB request from only sharer, was OLS";
- SW, AccessPermission:Busy, desc="local WB request from only sharer, was SLS";
- OXW, AccessPermission:Busy, desc="local WB request from only sharer, was OLSX";
- OLSXW, AccessPermission:Busy, desc="local WB request from sharer, was OLSX";
ILXW, AccessPermission:Busy, desc="local WB request, was ILX";
+ SLSW, AccessPermission:Read_Only, desc="local WB request, was SLS";
+ OLSW, AccessPermission:Read_Only, desc="local WB request, was OLS";
+ OW, AccessPermission:Read_Only, desc="local WB request from only sharer, was OLS";
+ SW, AccessPermission:Read_Only, desc="local WB request from only sharer, was SLS";
+ OXW, AccessPermission:Read_Only, desc="local WB request from only sharer, was OLSX";
+ OLSXW, AccessPermission:Read_Only, desc="local WB request from sharer, was OLSX";
IFLS, AccessPermission:Busy, desc="Blocked, forwarded local GETS to _some_ local sharer";
IFLO, AccessPermission:Busy, desc="Blocked, forwarded local GETS to local owner";
@@ -111,29 +112,34 @@
IFLOSX, AccessPermission:Busy, desc="Blocked, forwarded local GETS to local owner w/ other sharers, chip is exclusive";
IFLXO, AccessPermission:Busy, desc="Blocked, forwarded local GETX to local owner with other sharers, chip is exclusive";
+ // Some states hold valid data while waiting for acks
IGS, AccessPermission:Busy, desc="Semi-blocked, issued local GETS to directory";
IGM, AccessPermission:Busy, desc="Blocked, issued local GETX to directory. Need global acks and data";
IGMLS, AccessPermission:Busy, desc="Blocked, issued local GETX to directory but may need to INV local sharers";
- IGMO, AccessPermission:Busy, desc="Blocked, have data for local GETX but need all acks";
+ IGMO, AccessPermission:Read_Only, desc="Blocked, have data for local GETX but need all acks";
+ IGMOU, AccessPermission:Busy, desc="Blocked, responded to GETX, waiting unblock";
IGMIO, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner with possible local sharer, may need to INV";
OGMIO, AccessPermission:Busy, desc="Blocked, issued local GETX, was owner, may need to INV";
- IGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETX";
+ IGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETX";
+ IGMIOFD, AccessPermission:Read_Only, desc="Blocked, was IGMIOF but received data, still waiting acks";
IGMIOFS, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETS";
OGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, was owner, waiting for global acks, got Fwd_GETX";
+ // Have valid data in some of these transient states
II, AccessPermission:Busy, desc="Blocked, handling invalidations";
MM, AccessPermission:Busy, desc="Blocked, was M satisfying local GETX";
SS, AccessPermission:Busy, desc="Blocked, was S satisfying local GETS";
OO, AccessPermission:Busy, desc="Blocked, was O satisfying local GETS";
- OLSS, AccessPermission:Busy, desc="Blocked, satisfying local GETS";
- OLSXS, AccessPermission:Busy, desc="Blocked, satisfying local GETS";
- SLSS, AccessPermission:Busy, desc="Blocked, satisfying local GETS";
+ OLSS, AccessPermission:Read_Only, desc="Blocked, satisfying local GETS";
+ OLSXS, AccessPermission:Read_Only, desc="Blocked, satisfying local GETS";
+ SLSS, AccessPermission:Read_Only, desc="Blocked, satisfying local GETS";
- OI, AccessPermission:Busy, desc="Blocked, doing writeback, was O";
- MI, AccessPermission:Busy, desc="Blocked, doing writeback, was M";
+ // Have valid data in most of this states
+ OI, AccessPermission:Read_Only, desc="Blocked, doing writeback, was O";
+ MI, AccessPermission:Read_Write, desc="Blocked, doing writeback, was M";
MII, AccessPermission:Busy, desc="Blocked, doing writeback, was M, got Fwd_GETX";
- OLSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS";
- ILSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS got Fwd_GETX";
+ OLSI, AccessPermission:Read_Only, desc="Blocked, doing writeback, was OLS";
+ ILSI, AccessPermission:Read_Only, desc="Blocked, doing writeback, was OLS got Fwd_GETX";
// DMA blocking states
ILOSD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack";
@@ -324,11 +330,22 @@
void copyDirToCache(Entry cache_entry, Addr addr) {
assert(is_valid(cache_entry));
DirEntry dir_entry := getDirEntry(addr);
+ assert(is_valid(dir_entry));
cache_entry.Sharers := dir_entry.Sharers;
cache_entry.Owner := dir_entry.Owner;
cache_entry.OwnerValid := dir_entry.OwnerValid;
}
+ bool isDirEntryClean(DirEntry dir_entry) {
+ assert(is_valid(dir_entry));
+ return (dir_entry.Sharers.count() == 0) &&
+ (dir_entry.OwnerValid == false);
+ }
+
+ bool isCacheEntryClean(Entry cache_entry) {
+ return (cache_entry.Sharers.count() == 0) &&
+ (cache_entry.OwnerValid == false);
+ }
void recordLocalSharerInDir(Entry cache_entry, Addr addr, MachineID shar_id) {
if (is_valid(cache_entry)) {
@@ -478,7 +495,6 @@
}
State getState(TBE tbe, Entry cache_entry, Addr addr) {
-
if (is_valid(tbe)) {
return tbe.TBEState;
} else if (is_valid(cache_entry)) {
@@ -487,7 +503,7 @@
DirEntry dir_entry := getDirEntry(addr);
return dir_entry.DirState;
} else {
- return State:NP;
+ return State:I;
}
}
@@ -496,45 +512,39 @@
}
void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+ // Consistency checks
+
+ // Either on the the cache, directory, or invalid
assert((localDirectory.isTagPresent(addr) && L2cache.isTagPresent(addr)) == false);
+ if (state == State:I) {
+ assert(L2cache.isTagPresent(addr) == false);
+ assert(is_valid(cache_entry) == false);
+ assert(localDirectory.isTagPresent(addr) == false);
+ } else if ( (state == State:M) ||
+ (state == State:O) ||
+ (state == State:S) ||
+ (state == State:OLS) ||
+ (state == State:OLSX) ||
+ (state == State:SLS)) {
+ assert(is_valid(cache_entry));
+ assert(L2cache.isTagPresent(addr));
+ } else if ( (state == State:ILS) ||
+ (state == State:ILX) ||
+ (state == State:ILO) ||
+ (state == State:ILOX) ||
+ (state == State:ILOS) ||
+ (state == State:ILOSX)) {
+ assert(localDirectory.isTagPresent(addr));
+ }
+
+ // Update state
if (is_valid(tbe)) {
tbe.TBEState := state;
}
- if (
- (state == State:M) ||
- (state == State:O) ||
- (state == State:S) ||
- (state == State:OLS) ||
- (state == State:SLS) ||
- (state == State:OLSX) ||
- (state == State:SLS)
- ) {
- assert(is_valid(cache_entry));
- }
- else if (
- (state == State:ILS) ||
- (state == State:ILX) ||
- (state == State:ILO) ||
- (state == State:ILOX) ||
- (state == State:ILOS) ||
- (state == State:ILOSX)
- ) {
- // assert(isCacheTagPresent(addr) == false);
- }
-
if (is_valid(cache_entry)) {
- if ( ((cache_entry.CacheState != State:M) && (state == State:M)) ||
- ((cache_entry.CacheState != State:S) && (state == State:S)) ||
- ((cache_entry.CacheState != State:O) && (state == State:O)) ) {
- cache_entry.CacheState := state;
- // disable Coherence Checker for now
- // sequencer.checkCoherence(addr);
- }
- else {
- cache_entry.CacheState := state;
- }
+ cache_entry.CacheState := state;
}
else if (localDirectory.isTagPresent(addr)) {
DirEntry dir_entry := getDirEntry(addr);
@@ -545,13 +555,13 @@
AccessPermission getAccessPermission(Addr addr) {
TBE tbe := TBEs[addr];
if(is_valid(tbe)) {
- DPRINTF(RubySlicc, "%s\n", L2Cache_State_to_permission(tbe.TBEState));
+ DPRINTF(RubySlicc, "%s,%s\n", tbe.TBEState, L2Cache_State_to_permission(tbe.TBEState));
return L2Cache_State_to_permission(tbe.TBEState);
}
Entry cache_entry := getCacheEntry(addr);
if(is_valid(cache_entry)) {
- DPRINTF(RubySlicc, "%s\n", L2Cache_State_to_permission(cache_entry.CacheState));
+ DPRINTF(RubySlicc, "%s,%s\n", cache_entry.CacheState, L2Cache_State_to_permission(cache_entry.CacheState));
return L2Cache_State_to_permission(cache_entry.CacheState);
}
@@ -567,10 +577,13 @@
void functionalRead(Addr addr, Packet *pkt) {
TBE tbe := TBEs[addr];
+ Entry cache_entry := getCacheEntry(addr);
if(is_valid(tbe)) {
testAndRead(addr, tbe.DataBlk, pkt);
+ } else if (is_valid(cache_entry)) {
+ testAndRead(addr, cache_entry.DataBlk, pkt);
} else {
- testAndRead(addr, getCacheEntry(addr).DataBlk, pkt);
+ error("Block not present!");
}
}
@@ -581,11 +594,14 @@
if(is_valid(tbe)) {
num_functional_writes := num_functional_writes +
testAndWrite(addr, tbe.DataBlk, pkt);
- return num_functional_writes;
}
- num_functional_writes := num_functional_writes +
- testAndWrite(addr, getCacheEntry(addr).DataBlk, pkt);
+ Entry cache_entry := getCacheEntry(addr);
+ if (is_valid(cache_entry)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, cache_entry.DataBlk, pkt);
+ }
+
return num_functional_writes;
}
@@ -1560,6 +1576,16 @@
localDirectory.deallocate(address);
}
+ action(checkCacheNoSharersNoOwner, "/ckcache", desc="Remove dir state") {
+ assert(is_valid(cache_entry));
+ assert(isCacheEntryClean(cache_entry));
+ }
+
+ action(removeFromDir, "/rmdir", desc="Remove dir state") {
+ assert(isDirEntryClean(getDirEntry(address)));
+ localDirectory.deallocate(address);
+ }
+
action(zz_recycleGlobalRequestQueue, "\zglb", desc="Send the head of the mandatory queue to the back of the queue.") {
peek(requestNetwork_in, RequestMsg) {
APPEND_TRANSITION_COMMENT(in_msg.Requestor);
@@ -1599,27 +1625,27 @@
// TRANSITIONS
//*****************************************************
- transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) {
+ transition({II, IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMOU, IGMIO, OGMIO, IGMIOF, IGMIOFD, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) {
st_stallAndWaitL1RequestQueue;
}
- transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) {
+ transition({II, IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMOU, IGMIO, OGMIO, IGMIOF, IGMIOFD, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) {
st_stallAndWaitL1RequestQueue;
}
- transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) {
+ transition({IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, IGMOU, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) {
zz_recycleL1RequestQueue;
}
- transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) {
+ transition({IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) {
zz_recycleGlobalRequestQueue;
}
- transition({OGMIO, IGMIO, IGMO}, Fwd_DMA) {
+ transition({OGMIO, IGMIO, IGMO, IGMOU}, Fwd_DMA) {
zz_recycleGlobalRequestQueue;
}
- transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) {
+ transition({IFGX, IFGS, ISFGS, IFGXX, IFGXXD, IFLXO, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) {
zz_recycleGlobalRequestQueue;
}
@@ -1628,7 +1654,7 @@
}
// must happened because we forwarded GETX to local exclusive trying to do wb
- transition({I, M, O, ILS, ILOX, OLS, OLSX, SLS, S}, L1_PUTX) {
+ transition({M, O, ILS, ILOX, OLS, OLSX, SLS, S}, L1_PUTX) {
ll_writebackNack;
o_popL1RequestQueue;
}
@@ -1661,16 +1687,6 @@
o_popL1RequestQueue;
}
- // must happened because we got Inv when L1 attempted PUTS
- transition(I, L1_PUTS) {
- ll_writebackNack;
- o_popL1RequestQueue;
- }
-
- transition(I, L1_PUTO) {
- ll_writebackNack;
- o_popL1RequestQueue;
- }
// FORWARDED REQUESTS
@@ -1784,6 +1800,7 @@
i_copyDataToTBE;
c_sendExclusiveDataFromTBEToFwdGETS;
gg_clearLocalSharers;
+ removeFromDir;
s_deallocateTBE;
n_popResponseQueue;
wa_wakeUpDependents;
@@ -1801,6 +1818,7 @@
i_copyDataToTBE;
c_sendDataFromTBEToFwdGETX;
gg_clearLocalSharers;
+ removeFromDir;
s_deallocateTBE;
n_popResponseQueue;
wa_wakeUpDependents;
@@ -1816,32 +1834,31 @@
}
- transition(IFGXX, IntAck) {
+ transition({IFGXX, IFGXXD}, IntAck) {
m_decrementNumberOfMessagesInt;
o_checkForIntCompletion;
n_popResponseQueue;
}
- transition(IFGXX, Data_Exclusive) {
+ transition(IFGXX, Data_Exclusive, IFGXXD) {
i_copyDataToTBE;
m_decrementNumberOfMessagesInt;
o_checkForIntCompletion;
n_popResponseQueue;
}
- transition(IFGXX, All_Acks, I) {
+ transition(IFGXXD, All_Acks, I) {
c_sendDataFromTBEToFwdGETX;
gg_clearLocalSharers;
+ removeFromDir;
s_deallocateTBE;
n_popTriggerQueue;
wa_wakeUpDependents;
}
-
- // transition({O, OX}, Fwd_GETX, I) {
transition(O, Fwd_GETX, I) {
dd_sendDataToFwdGETX;
- y_copyCacheStateToDir;
+ checkCacheNoSharersNoOwner;
rr_deallocateL2CacheBlock;
m_popRequestQueue;
}
@@ -1871,19 +1888,14 @@
transition(M, Fwd_GETX, I) {
dd_sendDataToFwdGETX;
+ checkCacheNoSharersNoOwner;
rr_deallocateL2CacheBlock;
m_popRequestQueue;
}
- // MAKE THIS THE SAME POLICY FOR NOW
-
- // transition(M, Fwd_GETS, O) {
- // dd_sendDataToFwdGETS;
- // m_popRequestQueue;
- // }
-
transition(M, Fwd_GETS, I) {
dd_sendExclusiveDataToFwdGETS;
+ checkCacheNoSharersNoOwner;
rr_deallocateL2CacheBlock;
m_popRequestQueue;
}
@@ -1898,6 +1910,9 @@
i_allocateTBE;
t_recordFwdXID;
ee_sendLocalInv;
+ gg_clearLocalSharers;
+ checkCacheNoSharersNoOwner;
+ rr_deallocateL2CacheBlock;
m_popRequestQueue;
}
@@ -1909,9 +1924,7 @@
transition(OLSF, All_Acks, I) {
c_sendDataFromTBEToFwdGETX;
- gg_clearLocalSharers;
s_deallocateTBE;
- rr_deallocateL2CacheBlock;
n_popTriggerQueue;
wa_wakeUpDependents;
}
@@ -1926,7 +1939,7 @@
m_popRequestQueue;
}
- transition({I,NP}, Inv) {
+ transition(I, Inv) {
i_allocateTBE;
t_recordFwdXID;
e_sendAck;
@@ -1941,6 +1954,7 @@
t_recordFwdXID;
ee_sendLocalInv;
gg_clearLocalSharers;
+ removeFromDir;
m_popRequestQueue;
}
@@ -1970,6 +1984,7 @@
t_recordFwdXID;
e_sendAck;
s_deallocateTBE;
+ checkCacheNoSharersNoOwner;
rr_deallocateL2CacheBlock;
m_popRequestQueue;
}
@@ -2115,12 +2130,12 @@
// LOCAL REQUESTS THAT MUST ISSUE
- transition(NP, {L1_PUTS, L1_PUTX, L1_PUTO}) {
+ transition(I, {L1_PUTS, L1_PUTX, L1_PUTO}) {
ll_writebackNack;
o_popL1RequestQueue;
}
- transition({NP, I}, L1_GETS, IGS) {
+ transition(I, L1_GETS, IGS) {
i_allocateTBE;
s_recordGetSL1ID;
a_issueGETS;
@@ -2128,7 +2143,7 @@
o_popL1RequestQueue;
}
- transition({NP, I}, L1_GETX, IGM) {
+ transition(I, L1_GETX, IGM) {
i_allocateTBE;
s_recordGetXL1ID;
a_issueGETX;
@@ -2176,7 +2191,6 @@
n_popTriggerQueue;
}
- // transition(IGMLS, ExtAck, IGMO) {
transition(IGMLS, ExtAck) {
m_decrementNumberOfMessagesExt;
o_checkForExtCompletion;
@@ -2283,26 +2297,26 @@
n_popTriggerQueue;
}
- transition(IGMIOF, IntAck) {
+ transition({IGMIOF, IGMIOFD}, IntAck) {
m_decrementNumberOfMessagesInt;
o_checkForIntCompletion;
n_popResponseQueue;
}
- transition(IGMIOF, Data_Exclusive) {
+ transition(IGMIOF, Data_Exclusive, IGMIOFD) {
i_copyDataToTBE;
m_decrementNumberOfMessagesInt;
o_checkForIntCompletion;
n_popResponseQueue;
}
- transition(IGMIOF, All_Acks, IGM) {
+ transition(IGMIOFD, All_Acks, IGM) {
gg_clearLocalSharers;
c_sendDataFromTBEToFwdGETX;
n_popTriggerQueue;
}
- transition(IGMIO, All_Acks, IGMO) {
+ transition(IGMIO, All_Acks, IGMOU) {
hh_countLocalSharersExceptL1GETXRequestorInTBE;
ee_issueLocalInvExceptL1RequestorInTBE;
k_forwardLocalGETXToLocalOwner;
@@ -2310,7 +2324,7 @@
n_popTriggerQueue;
}
- transition(OGMIO, All_Acks, IGMO) {
+ transition(OGMIO, All_Acks, IGMOU) {
ee_issueLocalInvExceptL1RequestorInTBE;
c_sendDataFromTBEToL1GETX;
n_popTriggerQueue;
@@ -2372,12 +2386,12 @@
wa_wakeUpDependents;
}
- transition(IGMO, All_Acks) {
+ transition(IGMO, All_Acks, IGMOU) {
c_sendDataFromTBEToL1GETX;
n_popTriggerQueue;
}
- transition(IGMO, Exclusive_Unblock, ILX) {
+ transition(IGMOU, Exclusive_Unblock, ILX) {
g_recordLocalExclusive;
f_sendExclusiveUnblock;
s_deallocateTBE;
@@ -2791,7 +2805,8 @@
// L2 WRITEBACKS
- transition({I, S}, L2_Replacement, I) {
+ transition(S, L2_Replacement, I) {
+ checkCacheNoSharersNoOwner;
rr_deallocateL2CacheBlock;
}
@@ -2885,12 +2900,14 @@
transition({MI, OI}, Writeback_Ack, I) {
qq_sendDataFromTBEToMemory;
+ removeFromDir;
s_deallocateTBE;
n_popResponseQueue;
wa_wakeUpDependents;
}
transition(MII, Writeback_Nack, I) {
+ removeFromDir;
s_deallocateTBE;
n_popResponseQueue;
wa_wakeUpDependents;
@@ -2910,6 +2927,7 @@
transition(MII, Writeback_Ack, I) {
f_sendUnblock;
+ removeFromDir;
s_deallocateTBE;
n_popResponseQueue;
wa_wakeUpDependents;
diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm
index 7faa8e0..03010d5 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm
@@ -56,30 +56,40 @@
MessageBuffer * requestToMemory;
MessageBuffer * responseFromMemory;
+
+ MessageBuffer * triggerQueue;
{
// STATES
state_declaration(State, desc="Directory states", default="Directory_State_I") {
// Base states
I, AccessPermission:Read_Write, desc="Invalid";
- S, AccessPermission:Read_Only, desc="Shared";
+ S, AccessPermission:Read_Write, desc="Shared";
O, AccessPermission:Maybe_Stale, desc="Owner";
M, AccessPermission:Maybe_Stale, desc="Modified";
- IS, AccessPermission:Busy, desc="Blocked, was in idle";
+ // Transient states
+ // The memory has valid data in some of these
+ IS_M, AccessPermission:Read_Write, desc="Blocked, was in I, waiting for mem";
+ IS, AccessPermission:Read_Write, desc="Blocked, was in I, data forwarded";
SS, AccessPermission:Read_Only, desc="Blocked, was in shared";
OO, AccessPermission:Busy, desc="Blocked, was in owned";
MO, AccessPermission:Busy, desc="Blocked, going to owner or maybe modified";
- MM, AccessPermission:Busy, desc="Blocked, going to modified";
+ MM_M, AccessPermission:Read_Only, desc="Blocked, fetching from memory, going to MM";
+ MM, AccessPermission:Busy, desc="Blocked, req or mem data forwarded, going to modified";
MI, AccessPermission:Busy, desc="Blocked on a writeback";
MIS, AccessPermission:Busy, desc="Blocked on a writeback, but don't remove from sharers when received";
OS, AccessPermission:Busy, desc="Blocked on a writeback";
OSS, AccessPermission:Busy, desc="Blocked on a writeback, but don't remove from sharers when received";
- XI_M, AccessPermission:Busy, desc="In a stable state, going to I, waiting for the memory controller";
- XI_U, AccessPermission:Busy, desc="In a stable state, going to I, waiting for an unblock";
- OI_D, AccessPermission:Busy, desc="In O, going to I, waiting for data";
+ // We have valid data in a TBE
+ WBI, AccessPermission:Read_Only, desc="Sent writeback, waiting for memory; will be I";
+ WBS, AccessPermission:Read_Only, desc="Sent writeback, waiting for memory; will be S";
+ XI_M, AccessPermission:Read_Only, desc="Blocked, going to I, waiting for the memory controller";
+ XI_M_U, AccessPermission:Read_Only, desc="Blocked, going to XI_U, waiting for the memory controller";
+ XI_U, AccessPermission:Read_Only, desc="Blocked, going to I, waiting for an unblock";
+ OI_D, AccessPermission:Busy, desc="In O, going to I, waiting for data";
OD, AccessPermission:Busy, desc="In O, waiting for dma ack from L2";
MD, AccessPermission:Busy, desc="In M, waiting for dma ack from L2";
}
@@ -96,12 +106,15 @@
Exclusive_Unblock, desc="The processor become the exclusive owner (E or M) of the line";
Clean_Writeback, desc="The final message as part of a PutX/PutS, no data";
Dirty_Writeback, desc="The final message as part of a PutX/PutS, contains data";
- Memory_Data, desc="Fetched data from memory arrives";
+ Memory_Data_DMA, desc="Fetched data from memory arrives; original requestor is DMA";
+ Memory_Data_Cache, desc="Fetched data from memory arrives; original requestor is Cache";
Memory_Ack, desc="Writeback Ack from memory arrives";
DMA_READ, desc="DMA Read";
- DMA_WRITE, desc="DMA Write";
+ DMA_WRITE_LINE, desc="DMA Write full line";
+ DMA_WRITE_PARTIAL, desc="DMA Write partial line";
DMA_ACK, desc="DMA Ack";
Data, desc="Data to directory";
+ All_Acks, desk="All pending acks, unblocks, etc have been received";
}
// TYPES
@@ -119,6 +132,8 @@
int Len, desc="Length of request";
DataBlock DataBlk, desc="DataBlk";
MachineID Requestor, desc="original requestor";
+ bool WaitingWBAck, desc="DataBlk WB request sent, but no ack from mem yet";
+ bool WaitingDMAAck, desc="DMA ack sent, waiting for unblock";
}
structure(TBETable, external = "yes") {
@@ -128,6 +143,8 @@
bool isPresent(Addr);
}
+ int blockSize, default="RubySystem::getBlockSizeBytes()";
+
// ** OBJECTS **
TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
@@ -138,79 +155,113 @@
Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" {
Entry dir_entry := static_cast(Entry, "pointer", directory[addr]);
+ assert(is_valid(dir_entry));
+ return dir_entry;
+ }
- if (is_valid(dir_entry)) {
- return dir_entry;
- }
-
- dir_entry := static_cast(Entry, "pointer",
+ Entry allocateDirectoryEntry(Addr addr), return_by_pointer="yes" {
+ Entry dir_entry := static_cast(Entry, "pointer",
directory.allocate(addr, new Entry));
return dir_entry;
}
+ void deallocateDirectoryEntry(Addr addr) {
+ // Always going to transition from a valid state to I when deallocating
+ // Owners and shares must be clear
+ assert(getDirectoryEntry(addr).DirectoryState != State:I);
+ assert(getDirectoryEntry(addr).Owner.count() == 0);
+ assert(getDirectoryEntry(addr).Sharers.count() == 0);
+
+ directory.deallocate(addr);
+ }
+
State getState(TBE tbe, Addr addr) {
- return getDirectoryEntry(addr).DirectoryState;
+ Entry dir_entry := static_cast(Entry, "pointer", directory[addr]);
+ if (is_valid(dir_entry)) {
+ return dir_entry.DirectoryState;
+ }
+ else {
+ return State:I;
+ }
}
void setState(TBE tbe, Addr addr, State state) {
if (directory.isPresent(addr)) {
- if (state == State:I) {
- assert(getDirectoryEntry(addr).Owner.count() == 0);
- assert(getDirectoryEntry(addr).Sharers.count() == 0);
- }
+ Entry dir_entry := static_cast(Entry, "pointer", directory[addr]);
- if (state == State:S) {
- assert(getDirectoryEntry(addr).Owner.count() == 0);
- }
+ if (is_valid(dir_entry)) {
- if (state == State:O) {
- assert(getDirectoryEntry(addr).Owner.count() == 1);
- assert(getDirectoryEntry(addr).Sharers.isSuperset(getDirectoryEntry(addr).Owner) == false);
- }
+ assert(state != State:I);
- if (state == State:M) {
- assert(getDirectoryEntry(addr).Owner.count() == 1);
- assert(getDirectoryEntry(addr).Sharers.count() == 0);
- }
+ if (state == State:S) {
+ assert(dir_entry.Owner.count() == 0);
+ }
- if ((state != State:SS) && (state != State:OO)) {
- assert(getDirectoryEntry(addr).WaitingUnblocks == 0);
- }
+ if (state == State:O) {
+ assert(dir_entry.Owner.count() == 1);
+ assert(dir_entry.Sharers.isSuperset(dir_entry.Owner) == false);
+ }
- if ( (getDirectoryEntry(addr).DirectoryState != State:I) && (state == State:I) ) {
- getDirectoryEntry(addr).DirectoryState := state;
- // disable coherence checker
- // sequencer.checkCoherence(addr);
- }
- else {
- getDirectoryEntry(addr).DirectoryState := state;
+ if (state == State:M) {
+ assert(dir_entry.Owner.count() == 1);
+ assert(dir_entry.Sharers.count() == 0);
+ }
+
+ if ((state != State:SS) && (state != State:OO)) {
+ assert(dir_entry.WaitingUnblocks == 0);
+ }
+
+ dir_entry.DirectoryState := state;
+
+ } else {
+ assert(state == State:I);
}
}
}
AccessPermission getAccessPermission(Addr addr) {
if (directory.isPresent(addr)) {
- DPRINTF(RubySlicc, "%s\n", Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState));
- return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState);
+ Entry dir_entry := static_cast(Entry, "pointer", directory[addr]);
+ if (is_valid(dir_entry)) {
+ DPRINTF(RubySlicc, "%s,%s\n", dir_entry.DirectoryState, Directory_State_to_permission(dir_entry.DirectoryState));
+ return Directory_State_to_permission(dir_entry.DirectoryState);
+ } else {
+ DPRINTF(RubySlicc, "%s,%s\n", State:I, Directory_State_to_permission(State:I));
+ return Directory_State_to_permission(State:I);
+ }
}
-
DPRINTF(RubySlicc, "AccessPermission_NotPresent\n");
return AccessPermission:NotPresent;
}
void setAccessPermission(Addr addr, State state) {
if (directory.isPresent(addr)) {
- getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state));
+ Entry dir_entry := static_cast(Entry, "pointer", directory[addr]);
+ if (is_valid(dir_entry)) {
+ dir_entry.changePermission(Directory_State_to_permission(state));
+ } else {
+ assert(state == State:I);
+ }
}
}
void functionalRead(Addr addr, Packet *pkt) {
- functionalMemoryRead(pkt);
+ TBE tbe := TBEs[addr];
+ if (is_valid(tbe) && tbe.WaitingWBAck) {
+ testAndRead(addr, tbe.DataBlk, pkt);
+ } else {
+ functionalMemoryRead(pkt);
+ }
}
int functionalWrite(Addr addr, Packet *pkt) {
int num_functional_writes := 0;
+ TBE tbe := TBEs[addr];
+ if (is_valid(tbe)) {
+ num_functional_writes := num_functional_writes +
+ testAndWrite(addr, tbe.DataBlk, pkt);
+ }
num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
return num_functional_writes;
}
@@ -240,8 +291,26 @@
out_port(responseNetwork_out, ResponseMsg, responseFromDir);
out_port(memQueue_out, MemoryMsg, requestToMemory);
+ // For inserting internal unblocks only
+ out_port(unblockNetwork_out_internal, ResponseMsg, responseToDir);
+
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue);
+
// ** IN_PORTS **
+ // Trigger Queue
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=3) {
+ if (triggerQueue_in.isReady(clockEdge())) {
+ peek(triggerQueue_in, TriggerMsg) {
+ if (in_msg.Type == TriggerType:ALL_ACKS) {
+ trigger(Event:All_Acks, in_msg.addr, TBEs[in_msg.addr]);
+ } else {
+ error("Unexpected message");
+ }
+ }
+ }
+ }
+
in_port(unblockNetwork_in, ResponseMsg, responseToDir, rank=2) {
if (unblockNetwork_in.isReady(clockEdge())) {
peek(unblockNetwork_in, ResponseMsg) {
@@ -292,8 +361,13 @@
trigger(Event:DMA_READ, makeLineAddress(in_msg.addr),
TBEs[makeLineAddress(in_msg.addr)]);
} else if (in_msg.Type == CoherenceRequestType:DMA_WRITE) {
- trigger(Event:DMA_WRITE, makeLineAddress(in_msg.addr),
+ if (in_msg.Len == blockSize) {
+ assert(makeLineAddress(in_msg.addr) == in_msg.addr);
+ trigger(Event:DMA_WRITE_LINE, in_msg.addr, TBEs[in_msg.addr]);
+ } else {
+ trigger(Event:DMA_WRITE_PARTIAL, makeLineAddress(in_msg.addr),
TBEs[makeLineAddress(in_msg.addr)]);
+ }
} else {
error("Invalid message");
}
@@ -306,7 +380,12 @@
if (memQueue_in.isReady(clockEdge())) {
peek(memQueue_in, MemoryMsg) {
if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
- trigger(Event:Memory_Data, in_msg.addr, TBEs[in_msg.addr]);
+ if (machineIDToMachineType(in_msg.OriginalRequestorMachId) ==
+ MachineType:L2Cache) {
+ trigger(Event:Memory_Data_Cache, in_msg.addr, TBEs[in_msg.addr]);
+ } else {
+ trigger(Event:Memory_Data_DMA, in_msg.addr, TBEs[in_msg.addr]);
+ }
} else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
trigger(Event:Memory_Ack, in_msg.addr, TBEs[in_msg.addr]);
} else {
@@ -319,6 +398,14 @@
// Actions
+ action(allocDirEntry, "alloc", desc="Allocate directory entry") {
+ allocateDirectoryEntry(address);
+ }
+
+ action(deallocDirEntry, "dealloc", desc="Deallocate directory entry") {
+ deallocateDirectoryEntry(address);
+ }
+
action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") {
peek(requestQueue_in, RequestMsg) {
enqueue(responseNetwork_out, ResponseMsg, directory_latency) {
@@ -345,6 +432,18 @@
}
}
+ action(clearDMA, "cD", desc="Clear DMA flag in TBE") {
+ assert(is_valid(tbe));
+ assert(tbe.WaitingDMAAck);
+ tbe.WaitingDMAAck := false;
+ }
+
+ action(clearWBAck, "cWb", desc="Clear WB ack flag in TBE") {
+ assert(is_valid(tbe));
+ assert(tbe.WaitingWBAck);
+ tbe.WaitingWBAck := false;
+ }
+
action(c_clearOwner, "c", desc="Clear the owner field") {
getDirectoryEntry(address).Owner.clear();
}
@@ -360,6 +459,9 @@
action(d_sendDataMsg, "d", desc="Send data to requestor") {
peek(memQueue_in, MemoryMsg) {
+ // Not using tbe here, but we must have allocated on a memory
+ // request
+ assert(is_valid(tbe));
enqueue(responseNetwork_out, ResponseMsg, 1) {
out_msg.addr := address;
out_msg.Sender := machineID;
@@ -367,7 +469,11 @@
out_msg.Destination.add(in_msg.OriginalRequestorMachId);
out_msg.DataBlk := in_msg.DataBlk;
out_msg.Dirty := false; // By definition, the block is now clean
- out_msg.Acks := in_msg.Acks;
+ if (getDirectoryEntry(in_msg.addr).Sharers.isElement(in_msg.OriginalRequestorMachId) == true) {
+ out_msg.Acks := (getDirectoryEntry(in_msg.addr).Sharers.count()) - 1;
+ } else {
+ out_msg.Acks := getDirectoryEntry(in_msg.addr).Sharers.count();
+ }
if (in_msg.ReadX) {
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
} else {
@@ -378,16 +484,15 @@
}
}
- action(p_fwdDataToDMA, "\d", desc="Send data to requestor") {
- peek(requestQueue_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, 1) {
+ action(insertDMAUnblock, "idu", desc="insert dummy DMA unblock") {
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(unblockNetwork_out_internal, ResponseMsg, 1) {
out_msg.addr := address;
- out_msg.Sender := machineID;
- out_msg.SenderMachine := MachineType:Directory;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.Dirty := false; // By definition, the block is now clean
- out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
- out_msg.MessageSize := MessageSizeType:Response_Data;
+ out_msg.Type := CoherenceResponseType:UNBLOCK;
+ out_msg.Destination.add(machineID);
+ out_msg.Sender := in_msg.OriginalRequestorMachId;
+ out_msg.SenderMachine := MachineType:DMA;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
}
}
}
@@ -460,9 +565,26 @@
unblockNetwork_in.dequeue(clockEdge());
}
+ action(popTriggerQueue, "pt", desc="Pop trigger queue.") {
+ triggerQueue_in.dequeue(clockEdge());
+ }
+
+ action(checkForCompletion, "\o", desc="Check if we have received all the messages required for completion") {
+ assert(is_valid(tbe));
+ if ((tbe.WaitingDMAAck == false) &&
+ (tbe.WaitingWBAck == false)) {
+ enqueue(triggerQueue_out, TriggerMsg) {
+ out_msg.addr := address;
+ out_msg.Type := TriggerType:ALL_ACKS;
+ }
+ }
+ }
+
action(m_addUnlockerToSharers, "m", desc="Add the unlocker to the sharer list") {
peek(unblockNetwork_in, ResponseMsg) {
- getDirectoryEntry(address).Sharers.add(in_msg.Sender);
+ if (in_msg.SenderMachine == MachineType:L2Cache) {
+ getDirectoryEntry(address).Sharers.add(in_msg.Sender);
+ }
}
}
@@ -481,6 +603,7 @@
action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
peek(requestQueue_in, RequestMsg) {
+ assert(is_valid(tbe));
enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
out_msg.addr := address;
out_msg.Type := MemoryRequestType:MEMORY_READ;
@@ -493,31 +616,24 @@
action(qw_queueMemoryWBFromCacheRequest, "qw", desc="Queue off-chip writeback request") {
peek(requestQueue_in, RequestMsg) {
- if (is_valid(tbe)) {
- enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
- out_msg.addr := address;
- out_msg.Type := MemoryRequestType:MEMORY_WB;
- out_msg.Sender := tbe.Requestor;
- out_msg.MessageSize := MessageSizeType:Writeback_Data;
- out_msg.DataBlk := in_msg.DataBlk;
- out_msg.Len := 0;
- }
- } else {
- enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
- out_msg.addr := address;
- out_msg.Type := MemoryRequestType:MEMORY_WB;
- out_msg.Sender := in_msg.Requestor;
- out_msg.MessageSize := MessageSizeType:Writeback_Data;
- out_msg.DataBlk := in_msg.DataBlk;
- out_msg.Len := 0;
- }
+ assert(is_valid(tbe));
+ enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
+ out_msg.addr := address;
+ out_msg.Type := MemoryRequestType:MEMORY_WB;
+ out_msg.Sender := in_msg.Requestor;
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Len := 0;
}
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.WaitingWBAck := true;
}
}
- action(qw_queueMemoryWBRequestFromMessageAndTBE, "qwmt",
- desc="Queue off-chip writeback request") {
+ action(qw_queueMemoryWBFromCacheResp, "qwcmt",
+ desc="Queue partial off-chip writeback request") {
peek(unblockNetwork_in, ResponseMsg) {
+ assert(is_valid(tbe));
DataBlock DataBlk := in_msg.DataBlk;
DataBlk.copyPartial(tbe.DataBlk, getOffset(tbe.PhysicalAddress),
tbe.Len);
@@ -529,11 +645,34 @@
out_msg.DataBlk := DataBlk;
out_msg.Len := 0;
}
+ tbe.DataBlk := DataBlk;
+ tbe.WaitingWBAck := true;
+ }
+ }
+
+ action(qw_queueMemoryWBFromMemResp, "qwmmt",
+ desc="Queue partial off-chip writeback request") {
+ peek(memQueue_in, MemoryMsg) {
+ assert(is_valid(tbe));
+ DataBlock DataBlk := in_msg.DataBlk;
+ DataBlk.copyPartial(tbe.DataBlk, getOffset(tbe.PhysicalAddress),
+ tbe.Len);
+ enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
+ out_msg.addr := address;
+ out_msg.Type := MemoryRequestType:MEMORY_WB;
+ out_msg.Sender := tbe.Requestor;
+ out_msg.MessageSize := MessageSizeType:Writeback_Data;
+ out_msg.DataBlk := DataBlk;
+ out_msg.Len := 0;
+ }
+ tbe.DataBlk := DataBlk;
+ tbe.WaitingWBAck := true;
}
}
action(qw_queueMemoryWBFromDMARequest, "/qw", desc="Queue off-chip writeback request") {
peek(requestQueue_in, RequestMsg) {
+ assert(is_valid(tbe));
enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
out_msg.addr := address;
out_msg.Type := MemoryRequestType:MEMORY_WB;
@@ -542,6 +681,8 @@
out_msg.DataBlk := in_msg.DataBlk;
out_msg.Len := 0;
}
+ tbe.DataBlk := in_msg.DataBlk;
+ tbe.WaitingWBAck := true;
}
}
@@ -549,115 +690,166 @@
requestQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
}
- action(a_sendDMAAck, "\a", desc="Send DMA Ack that write completed, along with Inv Ack count") {
+ action(a_sendDMAAckFromReq, "\a", desc="Send DMA Ack that write completed, along with Inv Ack count") {
peek(requestQueue_in, RequestMsg) {
enqueue(responseNetwork_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Sender := machineID;
- out_msg.SenderMachine := MachineType:Directory;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.DataBlk := in_msg.DataBlk;
- out_msg.Acks := getDirectoryEntry(address).Sharers.count(); // for dma requests
- out_msg.Type := CoherenceResponseType:DMA_ACK;
- out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ assert(is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Sender := machineID;
+ out_msg.SenderMachine := MachineType:Directory;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.Acks := getDirectoryEntry(address).Sharers.count(); // for dma requests
+ out_msg.Type := CoherenceResponseType:DMA_ACK;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ tbe.WaitingDMAAck := true;
}
}
}
- action(a_sendDMAAck2, "\aa", desc="Send DMA Ack that write completed, along with Inv Ack count") {
- peek(unblockNetwork_in, ResponseMsg) {
- enqueue(responseNetwork_out, ResponseMsg, 1) {
+ action(a_sendDMAAckFromTBE, "\aa", desc="Send DMA Ack that write completed, along with Inv Ack count") {
+ enqueue(responseNetwork_out, ResponseMsg, 1) {
+ assert(is_valid(tbe));
out_msg.addr := address;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:Directory;
- if (is_valid(tbe)) {
- out_msg.Destination.add(tbe.Requestor);
- }
- out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Destination.add(tbe.Requestor);
out_msg.Acks := getDirectoryEntry(address).Sharers.count(); // for dma requests
out_msg.Type := CoherenceResponseType:DMA_ACK;
out_msg.MessageSize := MessageSizeType:Writeback_Control;
- }
+ tbe.WaitingDMAAck := true;
}
}
action(v_allocateTBE, "v", desc="Allocate TBE entry") {
+ check_allocate(TBEs);
peek (requestQueue_in, RequestMsg) {
+ assert(is_valid(tbe) == false);
TBEs.allocate(address);
set_tbe(TBEs[address]);
tbe.PhysicalAddress := in_msg.addr;
tbe.Len := in_msg.Len;
tbe.DataBlk := in_msg.DataBlk;
tbe.Requestor := in_msg.Requestor;
+ tbe.WaitingWBAck := false;
+ tbe.WaitingDMAAck := false;
}
}
action(w_deallocateTBE, "w", desc="Deallocate TBE entry") {
+ assert(is_valid(tbe));
+ assert(tbe.WaitingWBAck == false);
+ assert(tbe.WaitingDMAAck == false);
TBEs.deallocate(address);
unset_tbe();
}
// TRANSITIONS
- transition(I, GETX, MM) {
+ transition(I, GETX, MM_M) {
+ allocDirEntry;
+ v_allocateTBE;
qf_queueMemoryFetchRequest;
i_popIncomingRequestQueue;
}
transition(I, DMA_READ, XI_M) {
+ allocDirEntry;
+ v_allocateTBE;
qf_queueMemoryFetchRequest;
i_popIncomingRequestQueue;
}
- transition(I, DMA_WRITE, XI_U) {
+ transition(I, DMA_WRITE_LINE, XI_U) {
+ allocDirEntry;
+ v_allocateTBE;
qw_queueMemoryWBFromDMARequest;
- a_sendDMAAck; // ack count may be zero
+ a_sendDMAAckFromReq; // ack count may be zero
i_popIncomingRequestQueue;
}
- transition(XI_M, Memory_Data, I) {
- d_sendDataMsg; // ack count may be zero
+ transition(I, DMA_WRITE_PARTIAL, XI_M_U) {
+ allocDirEntry;
+ v_allocateTBE;
+ qf_queueMemoryFetchRequest;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(XI_M_U, Memory_Data_DMA, XI_U) {
+ qw_queueMemoryWBFromMemResp;
+ a_sendDMAAckFromTBE; // ack count may be zero
q_popMemQueue;
}
- transition(XI_U, Exclusive_Unblock, I) {
+ transition(XI_M, Memory_Data_DMA, I) {
+ d_sendDataMsg; // ack count may be zero
+ deallocDirEntry;
+ w_deallocateTBE;
+ q_popMemQueue;
+ }
+
+ transition(XI_U, Exclusive_Unblock, XI_U) {
cc_clearSharers;
c_clearOwner;
+ clearDMA;
+ checkForCompletion;
j_popIncomingUnblockQueue;
}
- transition(S, GETX, MM) {
+ transition(XI_U, Memory_Ack, XI_U) {
+ clearWBAck;
+ checkForCompletion;
+ q_popMemQueue;
+ }
+
+ transition(XI_U, All_Acks, I) {
+ deallocDirEntry;
+ w_deallocateTBE;
+ popTriggerQueue;
+ }
+
+ transition(S, GETX, MM_M) {
+ v_allocateTBE;
qf_queueMemoryFetchRequest;
g_sendInvalidations;
i_popIncomingRequestQueue;
}
- transition(S, DMA_READ) {
- //qf_queueMemoryFetchRequest;
- p_fwdDataToDMA;
- //g_sendInvalidations; // the DMA will collect the invalidations then send an Unblock Exclusive
- i_popIncomingRequestQueue;
- }
-
- transition(S, DMA_WRITE, XI_U) {
+ transition(S, DMA_WRITE_LINE, XI_U) {
+ v_allocateTBE;
qw_queueMemoryWBFromDMARequest;
- a_sendDMAAck; // ack count may be zero
+ a_sendDMAAckFromReq; // ack count may be zero
g_sendInvalidations; // the DMA will collect invalidations
i_popIncomingRequestQueue;
}
- transition(I, GETS, IS) {
+ transition(S, DMA_WRITE_PARTIAL, XI_M_U) {
+ v_allocateTBE;
+ qf_queueMemoryFetchRequest;
+ g_sendInvalidations;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(I, GETS, IS_M) {
+ allocDirEntry;
+ v_allocateTBE;
qf_queueMemoryFetchRequest;
i_popIncomingRequestQueue;
}
- transition({S, SS}, GETS, SS) {
+ transition(S, {GETS, DMA_READ}, SS) {
+ v_allocateTBE;
qf_queueMemoryFetchRequest;
n_incrementOutstanding;
i_popIncomingRequestQueue;
}
- transition({I, S}, PUTO) {
+ transition(SS, {GETS, DMA_READ}) {
+ qf_queueMemoryFetchRequest;
+ n_incrementOutstanding;
+ i_popIncomingRequestQueue;
+ }
+
+ transition({I, S}, {PUTO, PUTO_SHARERS}) {
b_sendWriteBackNack;
i_popIncomingRequestQueue;
}
@@ -675,7 +867,6 @@
transition(O, DMA_READ, OD) {
f_forwardRequest; // this will cause the data to go to DMA directly
- //g_sendInvalidations; // this will cause acks to be sent to the DMA
i_popIncomingRequestQueue;
}
@@ -683,7 +874,7 @@
j_popIncomingUnblockQueue;
}
- transition({O,M}, DMA_WRITE, OI_D) {
+ transition({O,M}, {DMA_WRITE_LINE, DMA_WRITE_PARTIAL}, OI_D) {
f_forwardRequestDirIsRequestor; // need the modified data before we can proceed
g_sendInvalidations; // these go to the DMA Controller
v_allocateTBE;
@@ -691,9 +882,8 @@
}
transition(OI_D, Data, XI_U) {
- qw_queueMemoryWBRequestFromMessageAndTBE;
- a_sendDMAAck2; // ack count may be zero
- w_deallocateTBE;
+ qw_queueMemoryWBFromCacheResp;
+ a_sendDMAAckFromTBE; // ack count may be zero
j_popIncomingUnblockQueue;
}
@@ -719,22 +909,26 @@
}
transition(M, GETS, MO) {
+ v_allocateTBE;
f_forwardRequest;
i_popIncomingRequestQueue;
}
transition(M, PUTX, MI) {
+ v_allocateTBE;
a_sendWriteBackAck;
i_popIncomingRequestQueue;
}
// happens if M->O transition happens on-chip
transition(M, PUTO, MI) {
+ v_allocateTBE;
a_sendWriteBackAck;
i_popIncomingRequestQueue;
}
transition(M, PUTO_SHARERS, MIS) {
+ v_allocateTBE;
a_sendWriteBackAck;
i_popIncomingRequestQueue;
}
@@ -750,35 +944,39 @@
}
- transition({MM, MO, MI, MIS, OS, OSS, XI_M, XI_U, OI_D, OD, MD}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) {
+ transition({MM_M, MM, MO, MI, MIS, OS, OSS, WBI, WBS, XI_M, XI_M_U, XI_U, OI_D, OD, MD}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE_LINE, DMA_WRITE_PARTIAL}) {
zz_recycleRequest;
}
transition({MM, MO}, Exclusive_Unblock, M) {
+ w_deallocateTBE;
cc_clearSharers;
e_ownerIsUnblocker;
j_popIncomingUnblockQueue;
}
transition(MO, Unblock, O) {
+ w_deallocateTBE;
m_addUnlockerToSharers;
j_popIncomingUnblockQueue;
}
- transition({IS, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) {
+ transition({IS, IS_M, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX, DMA_WRITE_LINE,DMA_WRITE_PARTIAL}) {
zz_recycleRequest;
}
- transition(IS, GETS) {
+ transition({IS, IS_M}, {GETS, DMA_READ}) {
zz_recycleRequest;
}
transition(IS, Unblock, S) {
+ w_deallocateTBE;
m_addUnlockerToSharers;
j_popIncomingUnblockQueue;
}
transition(IS, Exclusive_Unblock, M) {
+ w_deallocateTBE;
cc_clearSharers;
e_ownerIsUnblocker;
j_popIncomingUnblockQueue;
@@ -791,6 +989,7 @@
}
transition(SS, Last_Unblock, S) {
+ w_deallocateTBE;
m_addUnlockerToSharers;
o_decrementOutstanding;
j_popIncomingUnblockQueue;
@@ -808,14 +1007,21 @@
j_popIncomingUnblockQueue;
}
- transition(MI, Dirty_Writeback, I) {
+ transition(MI, Dirty_Writeback, WBI) {
c_clearOwner;
cc_clearSharers;
qw_queueMemoryWBFromCacheRequest;
i_popIncomingRequestQueue;
}
- transition(MIS, Dirty_Writeback, S) {
+ transition(WBI, Memory_Ack, I) {
+ clearWBAck;
+ w_deallocateTBE;
+ deallocDirEntry;
+ q_popMemQueue;
+ }
+
+ transition(MIS, Dirty_Writeback, WBS) {
c_moveOwnerToSharer;
qw_queueMemoryWBFromCacheRequest;
i_popIncomingRequestQueue;
@@ -823,21 +1029,30 @@
transition(MIS, Clean_Writeback, S) {
c_moveOwnerToSharer;
+ w_deallocateTBE;
i_popIncomingRequestQueue;
}
- transition(OS, Dirty_Writeback, S) {
+ transition(OS, Dirty_Writeback, WBS) {
c_clearOwner;
+ v_allocateTBE;
qw_queueMemoryWBFromCacheRequest;
i_popIncomingRequestQueue;
}
- transition(OSS, Dirty_Writeback, S) {
+ transition(OSS, Dirty_Writeback, WBS) {
c_moveOwnerToSharer;
+ v_allocateTBE;
qw_queueMemoryWBFromCacheRequest;
i_popIncomingRequestQueue;
}
+ transition(WBS, Memory_Ack, S) {
+ clearWBAck;
+ w_deallocateTBE;
+ q_popMemQueue;
+ }
+
transition(OSS, Clean_Writeback, S) {
c_moveOwnerToSharer;
i_popIncomingRequestQueue;
@@ -846,6 +1061,8 @@
transition(MI, Clean_Writeback, I) {
c_clearOwner;
cc_clearSharers;
+ w_deallocateTBE;
+ deallocDirEntry;
i_popIncomingRequestQueue;
}
@@ -854,21 +1071,24 @@
i_popIncomingRequestQueue;
}
- transition({MI, MIS}, Unblock, M) {
- j_popIncomingUnblockQueue;
- }
-
- transition({OS, OSS}, Unblock, O) {
- j_popIncomingUnblockQueue;
- }
-
- transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS}, Memory_Data) {
+ transition({S, SS}, Memory_Data_Cache) {
d_sendDataMsg;
q_popMemQueue;
}
- transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS, XI_U, XI_M}, Memory_Ack) {
- //a_sendAck;
+ transition(IS_M, Memory_Data_Cache, IS) {
+ d_sendDataMsg;
+ q_popMemQueue;
+ }
+
+ transition(MM_M, Memory_Data_Cache, MM) {
+ d_sendDataMsg;
+ q_popMemQueue;
+ }
+
+ transition(SS, Memory_Data_DMA) {
+ d_sendDataMsg;
+ insertDMAUnblock; // DMA will not send unblocks in response to reads
q_popMemQueue;
}
diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm
index a3a9f63..c2eb593 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dma.sm
@@ -100,6 +100,7 @@
}
AccessPermission getAccessPermission(Addr addr) {
+ DPRINTF(RubySlicc, "AccessPermission_NotPresent\n");
return AccessPermission:NotPresent;
}
@@ -192,7 +193,7 @@
out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
out_msg.Requestor := machineID;
out_msg.RequestorMachine := MachineType:DMA;
- out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ out_msg.MessageSize := MessageSizeType:Data;
}
}
}
@@ -254,6 +255,7 @@
}
action(v_allocateTBE, "v", desc="Allocate TBE entry") {
+ check_allocate(TBEs);
TBEs.allocate(address);
set_tbe(TBEs[address]);
}
diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm
index 7dc5822..2dd34e4 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_directory-msg.sm
@@ -109,9 +109,7 @@
bool functionalRead(Packet *pkt) {
// Read only those messages that contain the data
- if (Type == CoherenceRequestType:DMA_READ ||
- Type == CoherenceRequestType:DMA_WRITE ||
- Type == CoherenceRequestType:WRITEBACK_CLEAN_DATA ||
+ if (Type == CoherenceRequestType:WRITEBACK_CLEAN_DATA ||
Type == CoherenceRequestType:WRITEBACK_DIRTY_DATA) {
return testAndRead(addr, DataBlk, pkt);
}
diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm
index e3a136f..b59cf97 100644
--- a/src/mem/ruby/protocol/RubySlicc_Types.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Types.sm
@@ -49,9 +49,13 @@
//
external_type(MessageBuffer, buffer="yes", inport="yes", outport="yes");
-external_type(OutPort, primitive="yes");
external_type(Scalar, primitive="yes");
+structure(OutPort, external = "yes", primitive="yes") {
+ void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay);
+ bool isDeferredMsgMapEmpty(Addr addr);
+}
+
structure(InPort, external = "yes", primitive="yes") {
bool isReady(Tick current_time);
Tick dequeue(Tick current_time);
@@ -59,6 +63,7 @@
bool isEmpty();
bool isStallMapEmpty();
int getStallMapSize();
+ bool hasStalledMsg(Addr addr);
}
external_type(NodeID, default="0", primitive="yes");
@@ -128,7 +133,6 @@
void writeCallbackScFail(Addr, DataBlock);
bool llscCheckMonitor(Addr);
- void checkCoherence(Addr);
void evictionCallback(Addr);
void recordRequestType(SequencerRequestType);
bool checkResourceAvailable(CacheResourceType, Addr);
@@ -147,7 +151,6 @@
Cycles, Cycles, Cycles);
void writeCallback(Addr, MachineType, DataBlock,
Cycles, Cycles, Cycles, bool);
- void checkCoherence(Addr);
void evictionCallback(Addr);
void recordCPReadCallBack(MachineID, MachineID);
void recordCPWriteCallBack(MachineID, MachineID);
@@ -168,7 +171,6 @@
Cycles, Cycles, Cycles, bool);
void invCallback(Addr);
void wbCallback(Addr);
- void checkCoherence(Addr);
void evictionCallback(Addr);
}
@@ -196,6 +198,7 @@
structure (DirectoryMemory, external = "yes") {
AbstractCacheEntry allocate(Addr, AbstractCacheEntry);
AbstractCacheEntry lookup(Addr);
+ void deallocate(Addr);
bool isPresent(Addr);
void invalidateBlock(Addr);
void recordRequestType(DirectoryRequestType);
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index 59611ae..b729d26 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -56,7 +56,7 @@
m_transitions_per_cycle(p->transitions_per_cycle),
m_buffer_size(p->buffer_size), m_recycle_latency(p->recycle_latency),
m_mandatory_queue_latency(p->mandatory_queue_latency),
- memoryPort(csprintf("%s.memory", name()), this, ""),
+ memoryPort(csprintf("%s.memory", name()), this),
addrRanges(p->addr_ranges.begin(), p->addr_ranges.end())
{
if (m_version == 0) {
@@ -250,12 +250,15 @@
// to make more progress. Make sure it wakes up
scheduleEvent(Cycles(1));
recvTimingResp(pkt);
- } else {
+ } else if (memoryPort.sendTimingReq(pkt)) {
mem_queue->dequeue(clockEdge());
- memoryPort.schedTimingReq(pkt, clockEdge());
// Since the queue was popped the controller may be able
// to make more progress. Make sure it wakes up
scheduleEvent(Cycles(1));
+ } else {
+ scheduleEvent(Cycles(1));
+ delete pkt;
+ delete s;
}
return true;
@@ -306,11 +309,6 @@
{
int num_functional_writes = 0;
- // Check the buffer from the controller to the memory.
- if (memoryPort.trySatisfyFunctional(pkt)) {
- num_functional_writes++;
- }
-
// Update memory itself.
memoryPort.sendFunctional(pkt);
return num_functional_writes + 1;
@@ -369,12 +367,15 @@
return true;
}
+void
+AbstractController::MemoryPort::recvReqRetry()
+{
+ controller->serviceMemoryQueue();
+}
+
AbstractController::MemoryPort::MemoryPort(const std::string &_name,
AbstractController *_controller,
- const std::string &_label)
- : QueuedMasterPort(_name, _controller, reqQueue, snoopRespQueue),
- reqQueue(*_controller, *this, _label),
- snoopRespQueue(*_controller, *this, false, _label),
- controller(_controller)
+ PortID id)
+ : MasterPort(_name, _controller, id), controller(_controller)
{
}
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 15aff12..1577cfa 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -228,26 +228,25 @@
/**
* Port that forwards requests and receives responses from the
- * memory controller. It has a queue of packets not yet sent.
+ * memory controller.
*/
- class MemoryPort : public QueuedMasterPort
+ class MemoryPort : public MasterPort
{
private:
- // Packet queues used to store outgoing requests and snoop responses.
- ReqPacketQueue reqQueue;
- SnoopRespPacketQueue snoopRespQueue;
-
// Controller that operates this port.
AbstractController *controller;
public:
MemoryPort(const std::string &_name, AbstractController *_controller,
- const std::string &_label);
+ PortID id = InvalidPortID);
+ protected:
// Function for receiving a timing response from the peer port.
// Currently the pkt is handed to the coherence controller
// associated with this port.
bool recvTimingResp(PacketPtr pkt);
+
+ void recvReqRetry();
};
/* Master port to the memory controller. */
diff --git a/src/mem/ruby/structures/DirectoryMemory.cc b/src/mem/ruby/structures/DirectoryMemory.cc
index e2ee0fc..c6e3ccf 100644
--- a/src/mem/ruby/structures/DirectoryMemory.cc
+++ b/src/mem/ruby/structures/DirectoryMemory.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2017,2019 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -127,6 +127,7 @@
idx = mapAddressToLocalIdx(address);
assert(idx < m_num_entries);
+ assert(m_entries[idx] == NULL);
entry->changePermission(AccessPermission_Read_Only);
m_entries[idx] = entry;
@@ -134,6 +135,20 @@
}
void
+DirectoryMemory::deallocate(Addr address)
+{
+ assert(isPresent(address));
+ uint64_t idx;
+ DPRINTF(RubyCache, "Removing entry for address: %#x\n", address);
+
+ idx = mapAddressToLocalIdx(address);
+ assert(idx < m_num_entries);
+ assert(m_entries[idx] != NULL);
+ delete m_entries[idx];
+ m_entries[idx] = NULL;
+}
+
+void
DirectoryMemory::print(ostream& out) const
{
}
diff --git a/src/mem/ruby/structures/DirectoryMemory.hh b/src/mem/ruby/structures/DirectoryMemory.hh
index f879b29..3dd0e95 100644
--- a/src/mem/ruby/structures/DirectoryMemory.hh
+++ b/src/mem/ruby/structures/DirectoryMemory.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2017,2019 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -79,6 +79,9 @@
AbstractCacheEntry *lookup(Addr address);
AbstractCacheEntry *allocate(Addr address, AbstractCacheEntry* new_entry);
+ // Explicitly free up this address
+ void deallocate(Addr address);
+
void print(std::ostream& out) const;
void recordRequestType(DirectoryRequestType requestType);
diff --git a/src/mem/ruby/structures/PerfectCacheMemory.hh b/src/mem/ruby/structures/PerfectCacheMemory.hh
index 363e3e8..9898995 100644
--- a/src/mem/ruby/structures/PerfectCacheMemory.hh
+++ b/src/mem/ruby/structures/PerfectCacheMemory.hh
@@ -1,4 +1,16 @@
/*
+ * Copyright (c) 2019 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
* All rights reserved.
*
@@ -138,7 +150,8 @@
inline void
PerfectCacheMemory<ENTRY>::deallocate(Addr address)
{
- m_map.erase(makeLineAddress(address));
+ auto num_erased M5_VAR_USED = m_map.erase(makeLineAddress(address));
+ assert(num_erased == 1);
}
// Returns with the physical address of the conflicting cache line
diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc
index 93275cb..0153b4c 100644
--- a/src/mem/ruby/system/GPUCoalescer.cc
+++ b/src/mem/ruby/system/GPUCoalescer.cc
@@ -113,11 +113,95 @@
return accessSegment;
}
+UncoalescedTable::UncoalescedTable(GPUCoalescer *gc)
+ : coalescer(gc)
+{
+}
+
+void
+UncoalescedTable::insertPacket(PacketPtr pkt)
+{
+ uint64_t seqNum = pkt->req->getReqInstSeqNum();
+
+ instMap[seqNum].push_back(pkt);
+ DPRINTF(GPUCoalescer, "Adding 0x%X seqNum %d to map. (map %d vec %d)\n",
+ pkt->getAddr(), seqNum, instMap.size(), instMap[seqNum].size());
+}
+
+bool
+UncoalescedTable::packetAvailable()
+{
+ return !instMap.empty();
+}
+
+PerInstPackets*
+UncoalescedTable::getInstPackets(int offset)
+{
+ if (offset >= instMap.size()) {
+ return nullptr;
+ }
+
+ auto instMapIter = instMap.begin();
+ std::advance(instMapIter, offset);
+
+ return &(instMapIter->second);
+}
+
+void
+UncoalescedTable::updateResources()
+{
+ for (auto iter = instMap.begin(); iter != instMap.end(); ) {
+ if (iter->second.empty()) {
+ instMap.erase(iter++);
+ coalescer->getGMTokenPort().sendTokens(1);
+ } else {
+ ++iter;
+ }
+ }
+}
+
+void
+UncoalescedTable::printRequestTable(std::stringstream& ss)
+{
+ ss << "UncoalescedTable contains " << instMap.size()
+ << " address entries." << std::endl;
+ for (auto& inst : instMap) {
+ ss << "Addr 0x" << std::hex << inst.first << std::dec
+ << " with " << inst.second.size() << " packets"
+ << std::endl;
+ }
+}
+
+void
+UncoalescedTable::checkDeadlock(Tick threshold)
+{
+ Tick current_time = curTick();
+
+ for (auto &it : instMap) {
+ for (auto &pkt : it.second) {
+ if (current_time - pkt->req->time() > threshold) {
+ std::stringstream ss;
+ printRequestTable(ss);
+
+ panic("Possible Deadlock detected. Aborting!\n"
+ "version: %d request.paddr: 0x%x uncoalescedTable: %d "
+ "current time: %u issue_time: %d difference: %d\n"
+ "Request Tables:\n\n%s", coalescer->getId(),
+ pkt->getAddr(), instMap.size(), current_time,
+ pkt->req->time(), current_time - pkt->req->time(),
+ ss.str());
+ }
+ }
+ }
+}
+
GPUCoalescer::GPUCoalescer(const Params *p)
: RubyPort(p),
issueEvent([this]{ completeIssue(); }, "Issue coalesced request",
false, Event::Progress_Event_Pri),
- deadlockCheckEvent([this]{ wakeup(); }, "GPUCoalescer deadlock check")
+ uncoalescedTable(this),
+ deadlockCheckEvent([this]{ wakeup(); }, "GPUCoalescer deadlock check"),
+ gmTokenPort(name() + ".gmTokenPort", this)
{
m_store_waiting_on_load_cycles = 0;
m_store_waiting_on_store_cycles = 0;
@@ -126,8 +210,9 @@
m_outstanding_count = 0;
+ coalescingWindow = p->max_coalesces_per_cycle;
+
m_max_outstanding_requests = 0;
- m_deadlock_threshold = 0;
m_instCache_ptr = nullptr;
m_dataCache_ptr = nullptr;
@@ -149,52 +234,46 @@
{
}
+Port &
+GPUCoalescer::getPort(const std::string &if_name, PortID idx)
+{
+ if (if_name == "gmTokenPort") {
+ return gmTokenPort;
+ }
+
+ // delgate to RubyPort otherwise
+ return RubyPort::getPort(if_name, idx);
+}
+
void
GPUCoalescer::wakeup()
{
- // Check for deadlock of any of the requests
Cycles current_time = curCycle();
+ for (auto& requestList : coalescedTable) {
+ for (auto& req : requestList.second) {
+ if (current_time - req->getIssueTime() > m_deadlock_threshold) {
+ std::stringstream ss;
+ printRequestTable(ss);
+ ss << "Outstanding requests: " << m_outstanding_count
+ << std::endl;
- // Check across all outstanding requests
- int total_outstanding = 0;
-
- RequestTable::iterator read = m_readRequestTable.begin();
- RequestTable::iterator read_end = m_readRequestTable.end();
- for (; read != read_end; ++read) {
- GPUCoalescerRequest* request = read->second;
- if (current_time - request->issue_time < m_deadlock_threshold)
- continue;
-
- panic("Possible Deadlock detected. Aborting!\n"
- "version: %d request.paddr: 0x%x m_readRequestTable: %d "
- "current time: %u issue_time: %d difference: %d\n", m_version,
- request->pkt->getAddr(), m_readRequestTable.size(),
- current_time * clockPeriod(), request->issue_time * clockPeriod(),
- (current_time - request->issue_time)*clockPeriod());
+ panic("Possible Deadlock detected. Aborting!\n"
+ "version: %d request.paddr: 0x%x coalescedTable: %d "
+ "current time: %u issue_time: %d difference: %d\n"
+ "Request Tables:\n %s", m_version,
+ req->getFirstPkt()->getAddr(),
+ coalescedTable.size(), cyclesToTicks(current_time),
+ cyclesToTicks(req->getIssueTime()),
+ cyclesToTicks(current_time - req->getIssueTime()),
+ ss.str());
+ }
+ }
}
- RequestTable::iterator write = m_writeRequestTable.begin();
- RequestTable::iterator write_end = m_writeRequestTable.end();
- for (; write != write_end; ++write) {
- GPUCoalescerRequest* request = write->second;
- if (current_time - request->issue_time < m_deadlock_threshold)
- continue;
-
- panic("Possible Deadlock detected. Aborting!\n"
- "version: %d request.paddr: 0x%x m_writeRequestTable: %d "
- "current time: %u issue_time: %d difference: %d\n", m_version,
- request->pkt->getAddr(), m_writeRequestTable.size(),
- current_time * clockPeriod(), request->issue_time * clockPeriod(),
- (current_time - request->issue_time) * clockPeriod());
- }
-
- total_outstanding += m_writeRequestTable.size();
- total_outstanding += m_readRequestTable.size();
-
- assert(m_outstanding_count == total_outstanding);
+ Tick tick_threshold = cyclesToTicks(m_deadlock_threshold);
+ uncoalescedTable.checkDeadlock(tick_threshold);
if (m_outstanding_count > 0) {
- // If there are still outstanding requests, keep checking
schedule(deadlockCheckEvent,
m_deadlock_threshold * clockPeriod() +
curTick());
@@ -202,6 +281,26 @@
}
void
+GPUCoalescer::printRequestTable(std::stringstream& ss)
+{
+ uncoalescedTable.printRequestTable(ss);
+
+ ss << "CoalescedTable contains " << coalescedTable.size()
+ << " address entries." << std::endl;
+ for (auto& requestList : coalescedTable) {
+ ss << "Addr 0x" << std::hex << requestList.first << std::dec
+ << ": type-";
+ for (auto& request : requestList.second) {
+ ss << RubyRequestType_to_string(request->getRubyType())
+ << " pkts-" << request->getPackets().size()
+ << " issued-" << request->getIssueTime() << " seqNum-"
+ << request->getSeqNum() << "; ";
+ }
+ ss << std::endl;
+ }
+}
+
+void
GPUCoalescer::resetStats()
{
m_latencyHist.reset();
@@ -229,65 +328,6 @@
{
}
-RequestStatus
-GPUCoalescer::getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
-{
- Addr line_addr = makeLineAddress(pkt->getAddr());
-
- if (!m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge())) {
- return RequestStatus_BufferFull;
- }
-
- if (m_controller->isBlocked(line_addr) &&
- request_type != RubyRequestType_Locked_RMW_Write) {
- return RequestStatus_Aliased;
- }
-
- if ((request_type == RubyRequestType_ST) ||
- (request_type == RubyRequestType_ATOMIC) ||
- (request_type == RubyRequestType_ATOMIC_RETURN) ||
- (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
- (request_type == RubyRequestType_RMW_Read) ||
- (request_type == RubyRequestType_RMW_Write) ||
- (request_type == RubyRequestType_Load_Linked) ||
- (request_type == RubyRequestType_Store_Conditional) ||
- (request_type == RubyRequestType_Locked_RMW_Read) ||
- (request_type == RubyRequestType_Locked_RMW_Write) ||
- (request_type == RubyRequestType_FLUSH)) {
-
- // Check if there is any outstanding read request for the same
- // cache line.
- if (m_readRequestTable.count(line_addr) > 0) {
- m_store_waiting_on_load_cycles++;
- return RequestStatus_Aliased;
- }
-
- if (m_writeRequestTable.count(line_addr) > 0) {
- // There is an outstanding write request for the cache line
- m_store_waiting_on_store_cycles++;
- return RequestStatus_Aliased;
- }
- } else {
- // Check if there is any outstanding write request for the same
- // cache line.
- if (m_writeRequestTable.count(line_addr) > 0) {
- m_load_waiting_on_store_cycles++;
- return RequestStatus_Aliased;
- }
-
- if (m_readRequestTable.count(line_addr) > 0) {
- // There is an outstanding read request for the cache line
- m_load_waiting_on_load_cycles++;
- return RequestStatus_Aliased;
- }
- }
-
- return RequestStatus_Ready;
-
-}
-
-
-
// sets the kernelEndList
void
GPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt)
@@ -303,153 +343,6 @@
kernelEndList.size());
}
-
-// Insert the request on the correct request table. Return true if
-// the entry was already present.
-bool
-GPUCoalescer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
-{
- assert(getRequestStatus(pkt, request_type) == RequestStatus_Ready ||
- pkt->req->isLockedRMW() ||
- !m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge()));
-
- int total_outstanding M5_VAR_USED =
- m_writeRequestTable.size() + m_readRequestTable.size();
-
- assert(m_outstanding_count == total_outstanding);
-
- // See if we should schedule a deadlock check
- if (!deadlockCheckEvent.scheduled()) {
- schedule(deadlockCheckEvent, m_deadlock_threshold + curTick());
- }
-
- Addr line_addr = makeLineAddress(pkt->getAddr());
- if ((request_type == RubyRequestType_ST) ||
- (request_type == RubyRequestType_ATOMIC) ||
- (request_type == RubyRequestType_ATOMIC_RETURN) ||
- (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
- (request_type == RubyRequestType_RMW_Read) ||
- (request_type == RubyRequestType_RMW_Write) ||
- (request_type == RubyRequestType_Load_Linked) ||
- (request_type == RubyRequestType_Store_Conditional) ||
- (request_type == RubyRequestType_Locked_RMW_Read) ||
- (request_type == RubyRequestType_Locked_RMW_Write) ||
- (request_type == RubyRequestType_FLUSH)) {
-
- pair<RequestTable::iterator, bool> r =
- m_writeRequestTable.insert(RequestTable::value_type(line_addr,
- (GPUCoalescerRequest*) NULL));
- if (r.second) {
- RequestTable::iterator i = r.first;
- i->second = new GPUCoalescerRequest(pkt, request_type,
- curCycle());
- DPRINTF(GPUCoalescer,
- "Inserting write request for paddr %#x for type %d\n",
- pkt->req->getPaddr(), i->second->m_type);
- m_outstanding_count++;
- } else {
- return true;
- }
- } else {
- pair<RequestTable::iterator, bool> r =
- m_readRequestTable.insert(RequestTable::value_type(line_addr,
- (GPUCoalescerRequest*) NULL));
-
- if (r.second) {
- RequestTable::iterator i = r.first;
- i->second = new GPUCoalescerRequest(pkt, request_type,
- curCycle());
- DPRINTF(GPUCoalescer,
- "Inserting read request for paddr %#x for type %d\n",
- pkt->req->getPaddr(), i->second->m_type);
- m_outstanding_count++;
- } else {
- return true;
- }
- }
-
- m_outstandReqHist.sample(m_outstanding_count);
-
- total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size();
- assert(m_outstanding_count == total_outstanding);
-
- return false;
-}
-
-void
-GPUCoalescer::markRemoved()
-{
- m_outstanding_count--;
- assert(m_outstanding_count ==
- m_writeRequestTable.size() + m_readRequestTable.size());
-}
-
-void
-GPUCoalescer::removeRequest(GPUCoalescerRequest* srequest)
-{
- assert(m_outstanding_count ==
- m_writeRequestTable.size() + m_readRequestTable.size());
-
- Addr line_addr = makeLineAddress(srequest->pkt->getAddr());
- if ((srequest->m_type == RubyRequestType_ST) ||
- (srequest->m_type == RubyRequestType_RMW_Read) ||
- (srequest->m_type == RubyRequestType_RMW_Write) ||
- (srequest->m_type == RubyRequestType_Load_Linked) ||
- (srequest->m_type == RubyRequestType_Store_Conditional) ||
- (srequest->m_type == RubyRequestType_Locked_RMW_Read) ||
- (srequest->m_type == RubyRequestType_Locked_RMW_Write)) {
- m_writeRequestTable.erase(line_addr);
- } else {
- m_readRequestTable.erase(line_addr);
- }
-
- markRemoved();
-}
-
-bool
-GPUCoalescer::handleLlsc(Addr address, GPUCoalescerRequest* request)
-{
- //
- // The success flag indicates whether the LLSC operation was successful.
- // LL ops will always succeed, but SC may fail if the cache line is no
- // longer locked.
- //
- bool success = true;
- if (request->m_type == RubyRequestType_Store_Conditional) {
- if (!m_dataCache_ptr->isLocked(address, m_version)) {
- //
- // For failed SC requests, indicate the failure to the cpu by
- // setting the extra data to zero.
- //
- request->pkt->req->setExtraData(0);
- success = false;
- } else {
- //
- // For successful SC requests, indicate the success to the cpu by
- // setting the extra data to one.
- //
- request->pkt->req->setExtraData(1);
- }
- //
- // Independent of success, all SC operations must clear the lock
- //
- m_dataCache_ptr->clearLocked(address);
- } else if (request->m_type == RubyRequestType_Load_Linked) {
- //
- // Note: To fully follow Alpha LLSC semantics, should the LL clear any
- // previously locked cache lines?
- //
- m_dataCache_ptr->setLocked(address, m_version);
- } else if ((m_dataCache_ptr->isTagPresent(address)) &&
- (m_dataCache_ptr->isLocked(address, m_version))) {
- //
- // Normal writes should clear the locked address
- //
- m_dataCache_ptr->clearLocked(address);
- }
- return success;
-}
-
void
GPUCoalescer::writeCallback(Addr address, DataBlock& data)
{
@@ -487,49 +380,22 @@
bool isRegion)
{
assert(address == makeLineAddress(address));
+ assert(coalescedTable.count(address));
- DPRINTF(GPUCoalescer, "write callback for address %#x\n", address);
- assert(m_writeRequestTable.count(makeLineAddress(address)));
+ auto crequest = coalescedTable.at(address).front();
- RequestTable::iterator i = m_writeRequestTable.find(address);
- assert(i != m_writeRequestTable.end());
- GPUCoalescerRequest* request = i->second;
+ hitCallback(crequest, mach, data, true, crequest->getIssueTime(),
+ forwardRequestTime, firstResponseTime, isRegion);
- m_writeRequestTable.erase(i);
- markRemoved();
+ delete crequest;
+ coalescedTable.at(address).pop_front();
- assert((request->m_type == RubyRequestType_ST) ||
- (request->m_type == RubyRequestType_ATOMIC) ||
- (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
- (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
- (request->m_type == RubyRequestType_RMW_Read) ||
- (request->m_type == RubyRequestType_RMW_Write) ||
- (request->m_type == RubyRequestType_Load_Linked) ||
- (request->m_type == RubyRequestType_Store_Conditional) ||
- (request->m_type == RubyRequestType_Locked_RMW_Read) ||
- (request->m_type == RubyRequestType_Locked_RMW_Write) ||
- (request->m_type == RubyRequestType_FLUSH));
-
-
- //
- // For Alpha, properly handle LL, SC, and write requests with respect to
- // locked cache blocks.
- //
- // Not valid for Garnet_standalone protocl
- //
- bool success = true;
- if (!m_runningGarnetStandalone)
- success = handleLlsc(address, request);
-
- if (request->m_type == RubyRequestType_Locked_RMW_Read) {
- m_controller->blockOnQueue(address, m_mandatory_q_ptr);
- } else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
- m_controller->unblock(address);
+ if (coalescedTable.at(address).empty()) {
+ coalescedTable.erase(address);
+ } else {
+ auto nextRequest = coalescedTable.at(address).front();
+ issueRequest(nextRequest);
}
-
- hitCallback(request, mach, data, success,
- request->issue_time, forwardRequestTime, firstResponseTime,
- isRegion);
}
void
@@ -570,26 +436,37 @@
bool isRegion)
{
assert(address == makeLineAddress(address));
- assert(m_readRequestTable.count(makeLineAddress(address)));
+ assert(coalescedTable.count(address));
- DPRINTF(GPUCoalescer, "read callback for address %#x\n", address);
- RequestTable::iterator i = m_readRequestTable.find(address);
- assert(i != m_readRequestTable.end());
- GPUCoalescerRequest* request = i->second;
+ auto crequest = coalescedTable.at(address).front();
+ fatal_if(crequest->getRubyType() != RubyRequestType_LD,
+ "readCallback received non-read type response\n");
- m_readRequestTable.erase(i);
- markRemoved();
+ // Iterate over the coalesced requests to respond to as many loads as
+ // possible until another request type is seen. Models MSHR for TCP.
+ while (crequest->getRubyType() == RubyRequestType_LD) {
+ hitCallback(crequest, mach, data, true, crequest->getIssueTime(),
+ forwardRequestTime, firstResponseTime, isRegion);
- assert((request->m_type == RubyRequestType_LD) ||
- (request->m_type == RubyRequestType_IFETCH));
+ delete crequest;
+ coalescedTable.at(address).pop_front();
+ if (coalescedTable.at(address).empty()) {
+ break;
+ }
- hitCallback(request, mach, data, true,
- request->issue_time, forwardRequestTime, firstResponseTime,
- isRegion);
+ crequest = coalescedTable.at(address).front();
+ }
+
+ if (coalescedTable.at(address).empty()) {
+ coalescedTable.erase(address);
+ } else {
+ auto nextRequest = coalescedTable.at(address).front();
+ issueRequest(nextRequest);
+ }
}
void
-GPUCoalescer::hitCallback(GPUCoalescerRequest* srequest,
+GPUCoalescer::hitCallback(CoalescedRequest* crequest,
MachineType mach,
DataBlock& data,
bool success,
@@ -598,22 +475,15 @@
Cycles firstResponseTime,
bool isRegion)
{
- PacketPtr pkt = srequest->pkt;
+ PacketPtr pkt = crequest->getFirstPkt();
Addr request_address = pkt->getAddr();
Addr request_line_address = makeLineAddress(request_address);
- RubyRequestType type = srequest->m_type;
+ RubyRequestType type = crequest->getRubyType();
- // Set this cache entry to the most recently used
- if (type == RubyRequestType_IFETCH) {
- if (m_instCache_ptr->isTagPresent(request_line_address))
- m_instCache_ptr->setMRU(request_line_address);
- } else {
- if (m_dataCache_ptr->isTagPresent(request_line_address))
- m_dataCache_ptr->setMRU(request_line_address);
- }
+ DPRINTF(GPUCoalescer, "Got hitCallback for 0x%X\n", request_line_address);
- recordMissLatency(srequest, mach,
+ recordMissLatency(crequest, mach,
initialRequestTime,
forwardRequestTime,
firstResponseTime,
@@ -621,13 +491,11 @@
// update the data
//
// MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
- int len = reqCoalescer[request_line_address].size();
- std::vector<PacketPtr> mylist;
- for (int i = 0; i < len; ++i) {
- PacketPtr pkt = reqCoalescer[request_line_address][i].pkt;
- assert(type == reqCoalescer[request_line_address][i].primaryType);
+ std::vector<PacketPtr> pktList = crequest->getPackets();
+ DPRINTF(GPUCoalescer, "Responding to %d packets for addr 0x%X\n",
+ pktList.size(), request_line_address);
+ for (auto& pkt : pktList) {
request_address = pkt->getAddr();
- request_line_address = makeLineAddress(pkt->getAddr());
if (pkt->getPtr<uint8_t>()) {
if ((type == RubyRequestType_LD) ||
(type == RubyRequestType_ATOMIC) ||
@@ -658,36 +526,56 @@
RubyPort::SenderState *requestSenderState =
safe_cast<RubyPort::SenderState*>(pkt->senderState);
RubyTester::SenderState* testerSenderState =
- safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
+ safe_cast<RubyTester::SenderState*>
+ (requestSenderState->predecessor);
testerSenderState->subBlock.mergeFrom(data);
}
-
- mylist.push_back(pkt);
}
- delete srequest;
- reqCoalescer.erase(request_line_address);
- assert(!reqCoalescer.count(request_line_address));
- completeHitCallback(mylist, len);
+ m_outstanding_count--;
+ assert(m_outstanding_count >= 0);
+
+ completeHitCallback(pktList);
}
bool
GPUCoalescer::empty() const
{
- return m_writeRequestTable.empty() && m_readRequestTable.empty();
+ return coalescedTable.empty();
}
-// Analyzes the packet to see if this request can be coalesced.
-// If request can be coalesced, this request is added to the reqCoalescer table
-// and makeRequest returns RequestStatus_Issued;
-// If this is the first request to a cacheline, request is added to both
-// newRequests queue and to the reqCoalescer table; makeRequest
-// returns RequestStatus_Issued.
-// If there is a pending request to this cacheline and this request
-// can't be coalesced, RequestStatus_Aliased is returned and
-// the packet needs to be reissued.
+RubyRequestType
+GPUCoalescer::getRequestType(PacketPtr pkt)
+{
+ RubyRequestType req_type = RubyRequestType_NULL;
+
+ // These types are not support or not used in GPU caches.
+ assert(!pkt->req->isLLSC());
+ assert(!pkt->req->isLockedRMW());
+ assert(!pkt->req->isInstFetch());
+ assert(!pkt->isFlush());
+
+ if (pkt->req->isAtomicReturn()) {
+ req_type = RubyRequestType_ATOMIC_RETURN;
+ } else if (pkt->req->isAtomicNoReturn()) {
+ req_type = RubyRequestType_ATOMIC_NO_RETURN;
+ } else if (pkt->isRead()) {
+ req_type = RubyRequestType_LD;
+ } else if (pkt->isWrite()) {
+ req_type = RubyRequestType_ST;
+ } else {
+ // Acquire and release packets will have been issued by
+ // makeRequest, so we do not need to check for it here.
+ panic("Unsupported ruby packet type\n");
+ }
+
+ return req_type;
+}
+
+// Places an uncoalesced packet in uncoalescedTable. If the packet is a
+// special type (MemFence, scoping, etc), it is issued immediately.
RequestStatus
GPUCoalescer::makeRequest(PacketPtr pkt)
{
@@ -719,147 +607,37 @@
}
}
- // If number of outstanding requests greater than the max allowed,
- // return RequestStatus_BufferFull. This logic can be extended to
- // support proper backpressure.
- if (m_outstanding_count >= m_max_outstanding_requests) {
- return RequestStatus_BufferFull;
- }
-
- RubyRequestType primary_type = RubyRequestType_NULL;
- RubyRequestType secondary_type = RubyRequestType_NULL;
-
- if (pkt->isLLSC()) {
- //
- // Alpha LL/SC instructions need to be handled carefully by the cache
- // coherence protocol to ensure they follow the proper semantics. In
- // particular, by identifying the operations as atomic, the protocol
- // should understand that migratory sharing optimizations should not
- // be performed (i.e. a load between the LL and SC should not steal
- // away exclusive permission).
- //
- if (pkt->isWrite()) {
- primary_type = RubyRequestType_Store_Conditional;
- } else {
- assert(pkt->isRead());
- primary_type = RubyRequestType_Load_Linked;
- }
- secondary_type = RubyRequestType_ATOMIC;
- } else if (pkt->req->isLockedRMW()) {
- //
- // x86 locked instructions are translated to store cache coherence
- // requests because these requests should always be treated as read
- // exclusive operations and should leverage any migratory sharing
- // optimization built into the protocol.
- //
- if (pkt->isWrite()) {
- primary_type = RubyRequestType_Locked_RMW_Write;
- } else {
- assert(pkt->isRead());
- primary_type = RubyRequestType_Locked_RMW_Read;
- }
- secondary_type = RubyRequestType_ST;
- } else if (pkt->isAtomicOp()) {
- //
- // GPU Atomic Operation
- //
- primary_type = RubyRequestType_ATOMIC;
- secondary_type = RubyRequestType_ATOMIC;
- } else {
- if (pkt->isRead()) {
- if (pkt->req->isInstFetch()) {
- primary_type = secondary_type = RubyRequestType_IFETCH;
- } else {
-#if THE_ISA == X86_ISA
- uint32_t flags = pkt->req->getFlags();
- bool storeCheck = flags &
- (TheISA::StoreCheck << TheISA::FlagShift);
-#else
- bool storeCheck = false;
-#endif // X86_ISA
- if (storeCheck) {
- primary_type = RubyRequestType_RMW_Read;
- secondary_type = RubyRequestType_ST;
- } else {
- primary_type = secondary_type = RubyRequestType_LD;
- }
+ if (!pkt->isLLSC() && !pkt->req->isLockedRMW() && !pkt->isAtomicOp() &&
+ !pkt->isRead() && !pkt->isWrite() && !pkt->isFlush() &&
+ (pkt->req->isRelease() || pkt->req->isAcquire())) {
+ if (assumingRfOCoherence) {
+ // If we reached here, this request must be a memFence
+ // and the protocol implements RfO, the coalescer can
+ // assume sequentially consistency and schedule the callback
+ // immediately.
+ // Currently the code implements fence callbacks
+ // by reusing the mechanism for kernel completions.
+ // This should be fixed.
+ int wf_id = 0;
+ if (pkt->req->hasContextId()) {
+ wf_id = pkt->req->contextId();
}
- } else if (pkt->isWrite()) {
- //
- // Note: M5 packets do not differentiate ST from RMW_Write
- //
- primary_type = secondary_type = RubyRequestType_ST;
- } else if (pkt->isFlush()) {
- primary_type = secondary_type = RubyRequestType_FLUSH;
- } else if (pkt->req->isRelease() || pkt->req->isAcquire()) {
- if (assumingRfOCoherence) {
- // If we reached here, this request must be a memFence
- // and the protocol implements RfO, the coalescer can
- // assume sequentially consistency and schedule the callback
- // immediately.
- // Currently the code implements fence callbacks
- // by reusing the mechanism for kernel completions.
- // This should be fixed.
- int wf_id = 0;
- if (pkt->req->hasContextId()) {
- wf_id = pkt->req->contextId();
- }
- insertKernel(wf_id, pkt);
- newKernelEnds.push_back(wf_id);
- if (!issueEvent.scheduled()) {
- schedule(issueEvent, curTick());
- }
- return RequestStatus_Issued;
- } else {
- // If not RfO, return issued here and let the child coalescer
- // take care of it.
- return RequestStatus_Issued;
+ insertKernel(wf_id, pkt);
+ newKernelEnds.push_back(wf_id);
+ if (!issueEvent.scheduled()) {
+ schedule(issueEvent, curTick());
}
+ return RequestStatus_Issued;
} else {
- panic("Unsupported ruby packet type\n");
+ // If not RfO, return issued here and let the child coalescer
+ // take care of it.
+ return RequestStatus_Issued;
}
}
- // Check if there is any pending request to this cache line from
- // previous cycles.
- // If there is a pending request, return aliased. Since coalescing
- // across time is not permitted, aliased requests are not coalesced.
- // If a request for this address has already been issued, we must block
- RequestStatus status = getRequestStatus(pkt, primary_type);
- if (status != RequestStatus_Ready)
- return status;
+ uncoalescedTable.insertPacket(pkt);
+ DPRINTF(GPUCoalescer, "UC insertPacket 0x%X\n", pkt->getAddr());
- Addr line_addr = makeLineAddress(pkt->getAddr());
-
- // Check if this request can be coalesced with previous
- // requests from this cycle.
- if (!reqCoalescer.count(line_addr)) {
- // This is the first access to this cache line.
- // A new request to the memory subsystem has to be
- // made in the next cycle for this cache line, so
- // add this line addr to the "newRequests" queue
- newRequests.push_back(line_addr);
-
- // There was a request to this cache line in this cycle,
- // let us see if we can coalesce this request with the previous
- // requests from this cycle
- } else if (primary_type !=
- reqCoalescer[line_addr][0].primaryType) {
- // can't coalesce loads, stores and atomics!
- return RequestStatus_Aliased;
- } else if (pkt->req->isLockedRMW() ||
- reqCoalescer[line_addr][0].pkt->req->isLockedRMW()) {
- // can't coalesce locked accesses, but can coalesce atomics!
- return RequestStatus_Aliased;
- } else if (pkt->req->hasContextId() && pkt->req->isRelease() &&
- pkt->req->contextId() !=
- reqCoalescer[line_addr][0].pkt->req->contextId()) {
- // can't coalesce releases from different wavefronts
- return RequestStatus_Aliased;
- }
-
- // in addition to the packet, we need to save both request types
- reqCoalescer[line_addr].emplace_back(pkt, primary_type, secondary_type);
if (!issueEvent.scheduled())
schedule(issueEvent, curTick());
// TODO: issue hardware prefetches here
@@ -867,8 +645,9 @@
}
void
-GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
+GPUCoalescer::issueRequest(CoalescedRequest* crequest)
{
+ PacketPtr pkt = crequest->getFirstPkt();
int proc_id = -1;
if (pkt != NULL && pkt->req->hasContextId()) {
@@ -901,9 +680,9 @@
uint32_t blockSize = RubySystem::getBlockSizeBytes();
std::vector<bool> accessMask(blockSize,false);
std::vector< std::pair<int,AtomicOpFunctor*> > atomicOps;
- uint32_t tableSize = reqCoalescer[line_addr].size();
+ uint32_t tableSize = crequest->getPackets().size();
for (int i = 0; i < tableSize; i++) {
- PacketPtr tmpPkt = reqCoalescer[line_addr][i].pkt;
+ PacketPtr tmpPkt = crequest->getPackets()[i];
uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
uint32_t tmpSize = tmpPkt->getSize();
if (tmpPkt->isAtomicOp()) {
@@ -922,7 +701,7 @@
if (pkt->isAtomicOp()) {
msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
pkt->getPtr<uint8_t>(),
- pkt->getSize(), pc, secondary_type,
+ pkt->getSize(), pc, crequest->getRubyType(),
RubyAccessMode_Supervisor, pkt,
PrefetchBit_No, proc_id, 100,
blockSize, accessMask,
@@ -931,7 +710,7 @@
} else {
msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
pkt->getPtr<uint8_t>(),
- pkt->getSize(), pc, secondary_type,
+ pkt->getSize(), pc, crequest->getRubyType(),
RubyAccessMode_Supervisor, pkt,
PrefetchBit_No, proc_id, 100,
blockSize, accessMask,
@@ -941,15 +720,21 @@
DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
curTick(), m_version, "Coal", "Begin", "", "",
printAddress(msg->getPhysicalAddress()),
- RubyRequestType_to_string(secondary_type));
+ RubyRequestType_to_string(crequest->getRubyType()));
- fatal_if(secondary_type == RubyRequestType_IFETCH,
+ fatal_if(crequest->getRubyType() == RubyRequestType_IFETCH,
"there should not be any I-Fetch requests in the GPU Coalescer");
Tick latency = cyclesToTicks(
- m_controller->mandatoryQueueLatency(secondary_type));
+ m_controller->mandatoryQueueLatency(crequest->getRubyType()));
assert(latency > 0);
+ if (!deadlockCheckEvent.scheduled()) {
+ schedule(deadlockCheckEvent,
+ m_deadlock_threshold * clockPeriod() +
+ curTick());
+ }
+
assert(m_mandatory_q_ptr);
m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
}
@@ -971,18 +756,9 @@
{
out << "[GPUCoalescer: " << m_version
<< ", outstanding requests: " << m_outstanding_count
- << ", read request table: " << m_readRequestTable
- << ", write request table: " << m_writeRequestTable
<< "]";
}
-// this can be called from setState whenever coherence permissions are
-// upgraded when invoked, coherence violations will be checked for the
-// given block
-void
-GPUCoalescer::checkCoherence(Addr addr)
-{
-}
void
GPUCoalescer::recordRequestType(SequencerRequestType requestType) {
@@ -990,40 +766,96 @@
SequencerRequestType_to_string(requestType));
}
+bool
+GPUCoalescer::coalescePacket(PacketPtr pkt)
+{
+ uint64_t seqNum = pkt->req->getReqInstSeqNum();
+ Addr line_addr = makeLineAddress(pkt->getAddr());
+
+ // If the packet has the same line address as a request already in the
+ // coalescedTable and has the same sequence number, it can be coalesced.
+ if (coalescedTable.count(line_addr)) {
+ // Search for a previous coalesced request with the same seqNum.
+ auto& creqQueue = coalescedTable.at(line_addr);
+ auto citer = std::find_if(creqQueue.begin(), creqQueue.end(),
+ [&](CoalescedRequest* c) { return c->getSeqNum() == seqNum; }
+ );
+ if (citer != creqQueue.end()) {
+ (*citer)->insertPacket(pkt);
+ return true;
+ }
+ }
+
+ if (m_outstanding_count < m_max_outstanding_requests) {
+ // This is an "aliased" or new request. Create a RubyRequest and
+ // append it to the list of "targets" in the coalescing table.
+ DPRINTF(GPUCoalescer, "Creating new or aliased request for 0x%X\n",
+ line_addr);
+
+ CoalescedRequest *creq = new CoalescedRequest(seqNum);
+ creq->insertPacket(pkt);
+ creq->setRubyType(getRequestType(pkt));
+ creq->setIssueTime(curCycle());
+
+ if (!coalescedTable.count(line_addr)) {
+ // If there is no outstanding request for this line address,
+ // create a new coalecsed request and issue it immediately.
+ auto reqList = std::deque<CoalescedRequest*> { creq };
+ coalescedTable.insert(std::make_pair(line_addr, reqList));
+
+ DPRINTF(GPUCoalescer, "Issued req type %s seqNum %d\n",
+ RubyRequestType_to_string(creq->getRubyType()), seqNum);
+ issueRequest(creq);
+ } else {
+ // The request is for a line address that is already outstanding
+ // but for a different instruction. Add it as a new request to be
+ // issued when the current outstanding request is completed.
+ coalescedTable.at(line_addr).push_back(creq);
+ DPRINTF(GPUCoalescer, "found address 0x%X with new seqNum %d\n",
+ line_addr, seqNum);
+ }
+
+ // In both cases, requests are added to the coalescing table and will
+ // be counted as outstanding requests.
+ m_outstanding_count++;
+
+ return true;
+ }
+
+ // The maximum number of outstanding requests have been issued.
+ return false;
+}
void
GPUCoalescer::completeIssue()
{
- // newRequests has the cacheline addresses of all the
- // requests which need to be issued to the memory subsystem
- // in this cycle
- int len = newRequests.size();
- DPRINTF(GPUCoalescer, "Completing issue for %d new requests.\n", len);
- for (int i = 0; i < len; ++i) {
- // Get the requests from reqCoalescer table. Get only the
- // first request for each cacheline, the remaining requests
- // can be coalesced with the first request. So, only
- // one request is issued per cacheline.
- RequestDesc info = reqCoalescer[newRequests[i]][0];
- PacketPtr pkt = info.pkt;
- DPRINTF(GPUCoalescer, "Completing for newReq %d: paddr %#x\n",
- i, pkt->req->getPaddr());
- // Insert this request to the read/writeRequestTables. These tables
- // are used to track aliased requests in makeRequest subroutine
- bool found = insertRequest(pkt, info.primaryType);
+ // Iterate over the maximum number of instructions we can coalesce
+ // per cycle (coalescingWindow).
+ for (int instIdx = 0; instIdx < coalescingWindow; ++instIdx) {
+ PerInstPackets *pktList =
+ uncoalescedTable.getInstPackets(instIdx);
- if (found) {
- panic("GPUCoalescer::makeRequest should never be called if the "
- "request is already outstanding\n");
+ // getInstPackets will return nullptr if no instruction
+ // exists at the current offset.
+ if (!pktList) {
+ break;
+ } else {
+ // Since we have a pointer to the list of packets in the inst,
+ // erase them from the list if coalescing is successful and
+ // leave them in the list otherwise. This aggressively attempts
+ // to coalesce as many packets as possible from the current inst.
+ pktList->remove_if(
+ [&](PacketPtr pkt) { return coalescePacket(pkt); }
+ );
}
-
- // Issue request to ruby subsystem
- issueRequest(pkt, info.secondaryType);
}
- newRequests.clear();
+
+ // Clean up any instructions in the uncoalesced table that have had
+ // all of their packets coalesced and return a token for that column.
+ uncoalescedTable.updateResources();
// have Kernel End releases been issued this cycle
- len = newKernelEnds.size();
+ int len = newKernelEnds.size();
for (int i = 0; i < len; i++) {
kernelCallback(newKernelEnds[i]);
}
@@ -1052,71 +884,27 @@
const DataBlock& data)
{
assert(address == makeLineAddress(address));
+ assert(coalescedTable.count(address));
- DPRINTF(GPUCoalescer, "atomic callback for address %#x\n", address);
- assert(m_writeRequestTable.count(makeLineAddress(address)));
+ auto crequest = coalescedTable.at(address).front();
- RequestTable::iterator i = m_writeRequestTable.find(address);
- assert(i != m_writeRequestTable.end());
- GPUCoalescerRequest* srequest = i->second;
+ fatal_if((crequest->getRubyType() != RubyRequestType_ATOMIC &&
+ crequest->getRubyType() != RubyRequestType_ATOMIC_RETURN &&
+ crequest->getRubyType() != RubyRequestType_ATOMIC_NO_RETURN),
+ "atomicCallback saw non-atomic type response\n");
- m_writeRequestTable.erase(i);
- markRemoved();
+ hitCallback(crequest, mach, (DataBlock&)data, true,
+ crequest->getIssueTime(), Cycles(0), Cycles(0), false);
- assert((srequest->m_type == RubyRequestType_ATOMIC) ||
- (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
- (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
+ delete crequest;
+ coalescedTable.at(address).pop_front();
-
- // Atomics don't write to cache, so there is no MRU update...
-
- recordMissLatency(srequest, mach,
- srequest->issue_time, Cycles(0), Cycles(0), true, false);
-
- PacketPtr pkt = srequest->pkt;
- Addr request_address = pkt->getAddr();
- Addr request_line_address = makeLineAddress(pkt->getAddr());
-
- int len = reqCoalescer[request_line_address].size();
- std::vector<PacketPtr> mylist;
- for (int i = 0; i < len; ++i) {
- PacketPtr pkt = reqCoalescer[request_line_address][i].pkt;
- assert(srequest->m_type ==
- reqCoalescer[request_line_address][i].primaryType);
- request_address = (pkt->getAddr());
- request_line_address = makeLineAddress(request_address);
- if (pkt->getPtr<uint8_t>() &&
- srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
- /* atomics are done in memory, and return the data *before* the atomic op... */
- pkt->setData(
- data.getData(getOffset(request_address), pkt->getSize()));
- } else {
- DPRINTF(MemoryAccess,
- "WARNING. Data not transfered from Ruby to M5 for type " \
- "%s\n",
- RubyRequestType_to_string(srequest->m_type));
- }
-
- // If using the RubyTester, update the RubyTester sender state's
- // subBlock with the recieved data. The tester will later access
- // this state.
- // Note: RubyPort will access it's sender state before the
- // RubyTester.
- if (m_usingRubyTester) {
- RubyPort::SenderState *requestSenderState =
- safe_cast<RubyPort::SenderState*>(pkt->senderState);
- RubyTester::SenderState* testerSenderState =
- safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
- testerSenderState->subBlock.mergeFrom(data);
- }
-
- mylist.push_back(pkt);
+ if (coalescedTable.at(address).empty()) {
+ coalescedTable.erase(address);
+ } else {
+ auto nextRequest = coalescedTable.at(address).front();
+ issueRequest(nextRequest);
}
- delete srequest;
- reqCoalescer.erase(request_line_address);
- assert(!reqCoalescer.count(request_line_address));
-
- completeHitCallback(mylist, len);
}
void
@@ -1148,42 +936,42 @@
}
void
-GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist, int len)
+GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist)
{
- for (int i = 0; i < len; ++i) {
+ for (auto& pkt : mylist) {
RubyPort::SenderState *ss =
- safe_cast<RubyPort::SenderState *>(mylist[i]->senderState);
+ safe_cast<RubyPort::SenderState *>(pkt->senderState);
MemSlavePort *port = ss->port;
assert(port != NULL);
- mylist[i]->senderState = ss->predecessor;
+ pkt->senderState = ss->predecessor;
delete ss;
- port->hitCallback(mylist[i]);
+ port->hitCallback(pkt);
trySendRetries();
}
+ // We schedule an event in the same tick as hitCallback (similar to
+ // makeRequest) rather than calling completeIssue directly to reduce
+ // function calls to complete issue. This can only happen if the max
+ // outstanding requests is less than the number of slots in the
+ // uncoalesced table and makeRequest is not called again.
+ if (uncoalescedTable.packetAvailable() && !issueEvent.scheduled()) {
+ schedule(issueEvent, curTick());
+ }
+
testDrainComplete();
}
-PacketPtr
-GPUCoalescer::mapAddrToPkt(Addr address)
-{
- RequestTable::iterator i = m_readRequestTable.find(address);
- assert(i != m_readRequestTable.end());
- GPUCoalescerRequest* request = i->second;
- return request->pkt;
-}
-
void
-GPUCoalescer::recordMissLatency(GPUCoalescerRequest* srequest,
+GPUCoalescer::recordMissLatency(CoalescedRequest* crequest,
MachineType mach,
Cycles initialRequestTime,
Cycles forwardRequestTime,
Cycles firstResponseTime,
bool success, bool isRegion)
{
- RubyRequestType type = srequest->m_type;
- Cycles issued_time = srequest->issue_time;
+ RubyRequestType type = crequest->getRubyType();
+ Cycles issued_time = crequest->getIssueTime();
Cycles completion_time = curCycle();
assert(completion_time >= issued_time);
Cycles total_lat = completion_time - issued_time;
@@ -1249,7 +1037,7 @@
DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
curTick(), m_version, "Coal",
success ? "Done" : "SC_Failed", "", "",
- printAddress(srequest->pkt->getAddr()), total_lat);
+ printAddress(crequest->getFirstPkt()->getAddr()), total_lat);
}
void
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh
index 1321173..56a2079 100644
--- a/src/mem/ruby/system/GPUCoalescer.hh
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -48,6 +48,7 @@
#include "mem/ruby/protocol/RubyRequestType.hh"
#include "mem/ruby/protocol/SequencerRequestType.hh"
#include "mem/ruby/system/Sequencer.hh"
+#include "mem/token_port.hh"
class DataBlock;
class CacheMsg;
@@ -59,47 +60,99 @@
HSAScope reqScopeToHSAScope(const RequestPtr &req);
HSASegment reqSegmentToHSASegment(const RequestPtr &req);
-struct GPUCoalescerRequest
-{
- PacketPtr pkt;
- RubyRequestType m_type;
- Cycles issue_time;
+// List of packets that belongs to a specific instruction.
+typedef std::list<PacketPtr> PerInstPackets;
- GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
- Cycles _issue_time)
- : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
- {}
-};
-
-class RequestDesc
+class UncoalescedTable
{
public:
- RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type)
- : pkt(pkt), primaryType(p_type), secondaryType(s_type)
- {
- }
+ UncoalescedTable(GPUCoalescer *gc);
+ ~UncoalescedTable() {}
- RequestDesc() : pkt(nullptr), primaryType(RubyRequestType_NULL),
- secondaryType(RubyRequestType_NULL)
- {
- }
+ void insertPacket(PacketPtr pkt);
+ bool packetAvailable();
+ void printRequestTable(std::stringstream& ss);
- PacketPtr pkt;
- RubyRequestType primaryType;
- RubyRequestType secondaryType;
+ // Returns a pointer to the list of packets corresponding to an
+ // instruction in the instruction map or nullptr if there are no
+ // instructions at the offset.
+ PerInstPackets* getInstPackets(int offset);
+ void updateResources();
+
+ // Check if a packet hasn't been removed from instMap in too long.
+ // Panics if a deadlock is detected and returns nothing otherwise.
+ void checkDeadlock(Tick threshold);
+
+ private:
+ GPUCoalescer *coalescer;
+
+ // Maps an instructions unique sequence number to a queue of packets
+ // which need responses. This data structure assumes the sequence number
+ // is monotonically increasing (which is true for CU class) in order to
+ // issue packets in age order.
+ std::map<uint64_t, PerInstPackets> instMap;
};
-std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
+class CoalescedRequest
+{
+ public:
+ CoalescedRequest(uint64_t _seqNum)
+ : seqNum(_seqNum), issueTime(Cycles(0)),
+ rubyType(RubyRequestType_NULL)
+ {}
+ ~CoalescedRequest() {}
+
+ void insertPacket(PacketPtr pkt) { pkts.push_back(pkt); }
+ void setSeqNum(uint64_t _seqNum) { seqNum = _seqNum; }
+ void setIssueTime(Cycles _issueTime) { issueTime = _issueTime; }
+ void setRubyType(RubyRequestType type) { rubyType = type; }
+
+ uint64_t getSeqNum() const { return seqNum; }
+ PacketPtr getFirstPkt() const { return pkts[0]; }
+ Cycles getIssueTime() const { return issueTime; }
+ RubyRequestType getRubyType() const { return rubyType; }
+ std::vector<PacketPtr>& getPackets() { return pkts; }
+
+ private:
+ uint64_t seqNum;
+ Cycles issueTime;
+ RubyRequestType rubyType;
+ std::vector<PacketPtr> pkts;
+};
class GPUCoalescer : public RubyPort
{
public:
+ class GMTokenPort : public TokenSlavePort
+ {
+ public:
+ GMTokenPort(const std::string& name, ClockedObject *owner,
+ PortID id = InvalidPortID)
+ : TokenSlavePort(name, owner, id)
+ { }
+ ~GMTokenPort() { }
+
+ protected:
+ Tick recvAtomic(PacketPtr) { return Tick(0); }
+ void recvFunctional(PacketPtr) { }
+ bool recvTimingReq(PacketPtr) { return false; }
+ AddrRangeList getAddrRanges() const
+ {
+ AddrRangeList ranges;
+ return ranges;
+ }
+ };
+
typedef RubyGPUCoalescerParams Params;
GPUCoalescer(const Params *);
~GPUCoalescer();
+ Port &getPort(const std::string &if_name,
+ PortID idx = InvalidPortID) override;
+
// Public Methods
void wakeup(); // Used only for deadlock detection
+ void printRequestTable(std::stringstream& ss);
void printProgress(std::ostream& out) const;
void resetStats() override;
@@ -176,15 +229,14 @@
bool empty() const;
void print(std::ostream& out) const;
- void checkCoherence(Addr address);
- void markRemoved();
- void removeRequest(GPUCoalescerRequest* request);
void evictionCallback(Addr address);
void completeIssue();
void insertKernel(int wavefront_id, PacketPtr pkt);
+ GMTokenPort& getGMTokenPort() { return gmTokenPort; }
+
void recordRequestType(SequencerRequestType requestType);
Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
@@ -225,11 +277,11 @@
Addr pc, RubyAccessMode access_mode,
int size, DataBlock*& data_ptr);
// Alternate implementations in VIPER Coalescer
- virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
+ virtual void issueRequest(CoalescedRequest* crequest);
void kernelCallback(int wavfront_id);
- void hitCallback(GPUCoalescerRequest* request,
+ void hitCallback(CoalescedRequest* crequest,
MachineType mach,
DataBlock& data,
bool success,
@@ -237,21 +289,23 @@
Cycles forwardRequestTime,
Cycles firstResponseTime,
bool isRegion);
- void recordMissLatency(GPUCoalescerRequest* request,
+ void recordMissLatency(CoalescedRequest* crequest,
MachineType mach,
Cycles initialRequestTime,
Cycles forwardRequestTime,
Cycles firstResponseTime,
bool success, bool isRegion);
- void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
- PacketPtr mapAddrToPkt(Addr address);
+ void completeHitCallback(std::vector<PacketPtr> & mylist);
- RequestStatus getRequestStatus(PacketPtr pkt,
- RubyRequestType request_type);
- bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
+ virtual RubyRequestType getRequestType(PacketPtr pkt);
- bool handleLlsc(Addr address, GPUCoalescerRequest* request);
+ // Attempt to remove a packet from the uncoalescedTable and coalesce
+ // with a previous request from the same instruction. If there is no
+ // previous instruction and the max number of outstanding requests has
+ // not be reached, a new coalesced request is created and added to the
+ // "target" list of the coalescedTable.
+ bool coalescePacket(PacketPtr pkt);
EventFunctionWrapper issueEvent;
@@ -259,22 +313,27 @@
// Changed to protected to enable inheritance by VIPER Coalescer
protected:
int m_max_outstanding_requests;
- int m_deadlock_threshold;
+ Cycles m_deadlock_threshold;
CacheMemory* m_dataCache_ptr;
CacheMemory* m_instCache_ptr;
- // We need to track both the primary and secondary request types.
- // The secondary request type comprises a subset of RubyRequestTypes that
- // are understood by the L1 Controller. A primary request type can be any
- // RubyRequestType.
- typedef std::unordered_map<Addr, std::vector<RequestDesc>> CoalescingTable;
- CoalescingTable reqCoalescer;
- std::vector<Addr> newRequests;
+ // coalescingWindow is the maximum number of instructions that are
+ // allowed to be coalesced in a single cycle.
+ int coalescingWindow;
- typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
- RequestTable m_writeRequestTable;
- RequestTable m_readRequestTable;
+ // The uncoalescedTable contains several "columns" which hold memory
+ // request packets for an instruction. The maximum size is the number of
+ // columns * the wavefront size.
+ UncoalescedTable uncoalescedTable;
+
+ // An MSHR-like struct for holding coalesced requests. The requests in
+ // this table may or may not be outstanding in the memory hierarchy. The
+ // maximum size is equal to the maximum outstanding requests for a CU
+ // (typically the number of blocks in TCP). If there are duplicates of
+ // an address, the are serviced in age order.
+ std::map<Addr, std::deque<CoalescedRequest*>> coalescedTable;
+
// Global outstanding request count, across all request tables
int m_outstanding_count;
bool m_deadlock_check_scheduled;
@@ -335,7 +394,12 @@
std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist;
std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist;
-private:
+ private:
+ // Token port is used to send/receive tokens to/from GPU's global memory
+ // pipeline across the port boundary. There is one per <wave size> data
+ // ports in the CU.
+ GMTokenPort gmTokenPort;
+
// Private copy constructor and assignment operator
GPUCoalescer(const GPUCoalescer& obj);
GPUCoalescer& operator=(const GPUCoalescer& obj);
diff --git a/src/mem/ruby/system/GPUCoalescer.py b/src/mem/ruby/system/GPUCoalescer.py
index c02fb75..0335981 100644
--- a/src/mem/ruby/system/GPUCoalescer.py
+++ b/src/mem/ruby/system/GPUCoalescer.py
@@ -42,6 +42,8 @@
# max_outstanding_requests = (wave front slots) x (wave front size)
max_outstanding_requests = Param.Int(40*64,
"max requests (incl. prefetches) outstanding")
+ max_coalesces_per_cycle = Param.Int(1, "max instructions that can be " \
+ "coalesced in a single cycle")
assume_rfo = Param.Bool(True, "assume protocol implementes Read for "
"Ownership coherence");
diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc
index c28f880..9fa4736 100644
--- a/src/mem/ruby/system/RubySystem.cc
+++ b/src/mem/ruby/system/RubySystem.cc
@@ -94,7 +94,7 @@
void
RubySystem::registerNetwork(Network* network_ptr)
{
- m_network = network_ptr;
+ m_networks.emplace_back(network_ptr);
}
void
@@ -108,7 +108,6 @@
RubySystem::~RubySystem()
{
- delete m_network;
delete m_profiler;
}
@@ -407,6 +406,9 @@
RubySystem::resetStats()
{
m_start_cycle = curCycle();
+ for (auto& network : m_networks) {
+ network->resetStats();
+ }
}
bool
@@ -511,8 +513,10 @@
DPRINTF(RubySystem, "Network functionalRead lookup "
"(num_maybe_stale=%d, num_busy = %d)\n",
num_maybe_stale, num_busy);
- if (m_network->functionalRead(pkt))
- return true;
+ for (auto& network : m_networks) {
+ if (network->functionalRead(pkt))
+ return true;
+ }
}
return false;
@@ -557,7 +561,9 @@
}
}
- num_functional_writes += m_network->functionalWrite(pkt);
+ for (auto& network : m_networks) {
+ num_functional_writes += network->functionalWrite(pkt);
+ }
DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
return true;
diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh
index 5d10991..2407072 100644
--- a/src/mem/ruby/system/RubySystem.hh
+++ b/src/mem/ruby/system/RubySystem.hh
@@ -130,7 +130,8 @@
SimpleMemory *m_phys_mem;
const bool m_access_backing_store;
- Network* m_network;
+ //std::vector<Network *> m_networks;
+ std::vector<std::unique_ptr<Network>> m_networks;
std::vector<AbstractController *> m_abs_cntrl_vec;
Cycles m_start_cycle;
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index de7941a..aa134f4 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -738,14 +738,6 @@
<< "]";
}
-// this can be called from setState whenever coherence permissions are
-// upgraded when invoked, coherence violations will be checked for the
-// given block
-void
-Sequencer::checkCoherence(Addr addr)
-{
-}
-
void
Sequencer::recordRequestType(SequencerRequestType requestType) {
DPRINTF(RubyStats, "Recorded statistic: %s\n",
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index bb93607..ebca568 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -124,7 +124,6 @@
{ deschedule(deadlockCheckEvent); }
void print(std::ostream& out) const;
- void checkCoherence(Addr address);
void markRemoved();
void evictionCallback(Addr address);
diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc
index feb13c5..d8977ac 100644
--- a/src/mem/ruby/system/VIPERCoalescer.cc
+++ b/src/mem/ruby/system/VIPERCoalescer.cc
@@ -76,15 +76,8 @@
{
}
-// Analyzes the packet to see if this request can be coalesced.
-// If request can be coalesced, this request is added to the reqCoalescer table
-// and makeRequest returns RequestStatus_Issued;
-// If this is the first request to a cacheline, request is added to both
-// newRequests queue and to the reqCoalescer table; makeRequest
-// returns RequestStatus_Issued.
-// If there is a pending request to this cacheline and this request
-// can't be coalesced, RequestStatus_Aliased is returned and
-// the packet needs to be reissued.
+// Places an uncoalesced packet in uncoalescedTable. If the packet is a
+// special type (MemFence, scoping, etc), it is issued immediately.
RequestStatus
VIPERCoalescer::makeRequest(PacketPtr pkt)
{
@@ -109,7 +102,6 @@
return RequestStatus_Issued;
}
-// return RequestStatus_Aliased;
} else if (pkt->req->isKernel() && pkt->req->isRelease()) {
// Flush Dirty Data on Kernel End
// isKernel + isRelease
@@ -123,13 +115,10 @@
}
return RequestStatus_Issued;
}
- RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
- if (requestStatus!=RequestStatus_Issued) {
- // Request not isssued
- // enqueue Retry
- DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
- return requestStatus;
- } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
+
+ GPUCoalescer::makeRequest(pkt);
+
+ if (pkt->req->isKernel() && pkt->req->isAcquire()) {
// Invalidate clean Data on Kernel Begin
// isKernel + isAcquire
invL1();
diff --git a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py
new file mode 100644
index 0000000..40b9a4c
--- /dev/null
+++ b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py
@@ -0,0 +1,82 @@
+#
+# Copyright (c) 2017 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Tuan Ta
+#
+
+from slicc.ast.StatementAST import StatementAST
+from slicc.symbols import Var
+
+class DeferEnqueueingStatementAST(StatementAST):
+ def __init__(self, slicc, queue_name, type_ast, statements):
+ super(DeferEnqueueingStatementAST, self).__init__(slicc)
+
+ self.queue_name = queue_name
+ self.type_ast = type_ast
+ self.statements = statements
+
+ def __repr__(self):
+ return "[DeferEnqueueingStatementAst: %s %s %s]" % \
+ (self.queue_name, self.type_ast.ident, self.statements)
+
+ def generate(self, code, return_type):
+ code("{")
+ code.indent()
+ self.symtab.pushFrame()
+
+ msg_type = self.type_ast.type
+
+ # Add new local var to symbol table
+ v = Var(self.symtab, "out_msg", self.location, msg_type, "*out_msg",
+ self.pairs)
+ self.symtab.newSymbol(v)
+
+ # Declare message
+ code("std::shared_ptr<${{msg_type.c_ident}}> out_msg = "\
+ "std::make_shared<${{msg_type.c_ident}}>(clockEdge());")
+
+ # The other statements
+ t = self.statements.generate(code, None)
+ self.queue_name.assertType("OutPort")
+
+ code("(${{self.queue_name.var.code}}).deferEnqueueingMessage(addr, "\
+ "out_msg);")
+
+ # End scope
+ self.symtab.popFrame()
+ code.dedent()
+ code("}")
+
+ def findResources(self, resources):
+ var = self.queue_name.var
+ res_count = int(resources.get(var, 0))
+ resources[var] = str(res_count + 1)
diff --git a/src/mem/slicc/ast/__init__.py b/src/mem/slicc/ast/__init__.py
index e3169e8..c410104 100644
--- a/src/mem/slicc/ast/__init__.py
+++ b/src/mem/slicc/ast/__init__.py
@@ -33,6 +33,7 @@
from slicc.ast.CheckNextCycleAST import *
from slicc.ast.DeclAST import *
from slicc.ast.DeclListAST import *
+from slicc.ast.DeferEnqueueingStatementAST import *
from slicc.ast.EnqueueStatementAST import *
from slicc.ast.EnumDeclAST import *
from slicc.ast.EnumExprAST import *
diff --git a/src/mem/slicc/parser.py b/src/mem/slicc/parser.py
index 846a74f..643eec6 100644
--- a/src/mem/slicc/parser.py
+++ b/src/mem/slicc/parser.py
@@ -120,6 +120,7 @@
'void' : 'VOID',
'new' : 'NEW',
'OOD' : 'OOD',
+ 'defer_enqueueing' : 'DEFER_ENQUEUEING',
}
literals = ':[]{}(),='
@@ -583,6 +584,10 @@
"statement : ENQUEUE '(' var ',' type ',' expr ')' statements"
p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], p[9])
+ def p_statement__defer_enqueueing(self, p):
+ "statement : DEFER_ENQUEUEING '(' var ',' type ')' statements"
+ p[0] = ast.DeferEnqueueingStatementAST(self, p[3], p[5], p[7])
+
def p_statement__stall_and_wait(self, p):
"statement : STALL_AND_WAIT '(' var ',' var ')' SEMI"
p[0] = ast.StallAndWaitStatementAST(self, p[3], p[5])
diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py
index 3317ae8..698dfbc 100644
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@@ -107,8 +107,9 @@
except ImportError:
pass
- do_dot(root, options.outdir, options.dot_config)
- do_ruby_dot(root, options.outdir, options.dot_config)
+ if options.dot_config:
+ do_dot(root, options.outdir, options.dot_config)
+ do_ruby_dot(root, options.outdir, options.dot_config)
# Initialize the global statistics
stats.initSimStats()
diff --git a/src/sim/core.hh b/src/sim/core.hh
index 7a7e911..48b7096 100644
--- a/src/sim/core.hh
+++ b/src/sim/core.hh
@@ -43,8 +43,6 @@
/// The universal simulation clock.
inline Tick curTick() { return _curEventQueue->getCurTick(); }
-const Tick retryTime = 1000;
-
/// These are variables that are set based on the simulator frequency
///@{
namespace SimClock {
diff --git a/src/sim/guest_abi/definition.hh b/src/sim/guest_abi/definition.hh
index becdb3c..4928b93 100644
--- a/src/sim/guest_abi/definition.hh
+++ b/src/sim/guest_abi/definition.hh
@@ -57,27 +57,28 @@
template <typename ABI, typename Ret, typename Enabled=void>
struct Result
{
- private:
/*
* Store result "ret" into the state accessible through tc. Optionally
* accept "state" in case it holds some signature wide information.
*
* Note that the declaration below is only to document the expected
- * signature and is private so it won't be used by accident.
+ * signature and is commented out so it won't be used by accident.
* Specializations of this Result class should define their own version
- * of this method which actually does something and is public.
+ * of this method which actually does something.
+ *
+ * static void store(ThreadContext *tc, const Ret &ret);
+ * static void store(ThreadContext *tc, const Ret &ret,
+ * typename ABI::State &state);
*/
- static void store(ThreadContext *tc, const Ret &ret);
- static void store(ThreadContext *tc, const Ret &ret,
- typename ABI::State &state);
/*
* Prepare for a result of type Ret. This might mean, for instance,
* allocating an argument register for a result pointer.
*
* This method can be excluded if no preparation is necessary.
+ *
+ * static void prepare(ThreadContext *tc, typename ABI::State &state);
*/
- static void prepare(ThreadContext *tc, typename ABI::State &state);
};
/*
@@ -98,16 +99,18 @@
*
* Like Result::store above, the declaration below is only to document
* the expected method signature.
+ *
+ * static Arg get(ThreadContext *tc, typename ABI::State &state);
*/
- static Arg get(ThreadContext *tc, typename ABI::State &state);
/*
* Prepare for an argument of type Arg. This might mean, for instance,
* allocating an argument register for a result pointer.
*
* This method can be excluded if no preparation is necessary.
+ *
+ * static void allocate(ThreadContext *tc, typename ABI::State &state);
*/
- static void allocate(ThreadContext *tc, typename ABI::State &state);
};
} // namespace GuestABI
diff --git a/src/sim/kernel_workload.cc b/src/sim/kernel_workload.cc
index 415ff96..74f9fc7 100644
--- a/src/sim/kernel_workload.cc
+++ b/src/sim/kernel_workload.cc
@@ -35,9 +35,6 @@
_loadAddrMask(p.load_addr_mask), _loadAddrOffset(p.load_addr_offset),
kernelSymtab(new Loader::SymbolTable), commandLine(p.command_line)
{
- if (!Loader::debugSymbolTable)
- Loader::debugSymbolTable = new Loader::SymbolTable;
-
if (params().object_file == "") {
inform("No kernel set for full system simulation. "
"Assuming you know what you're doing.");
@@ -70,10 +67,10 @@
fatal_if(!kernelObj->loadLocalSymbols(kernelSymtab),
"Could not load kernel local symbols.");
- fatal_if(!kernelObj->loadGlobalSymbols(Loader::debugSymbolTable),
+ fatal_if(!kernelObj->loadGlobalSymbols(&Loader::debugSymbolTable),
"Could not load kernel symbols.");
- fatal_if(!kernelObj->loadLocalSymbols(Loader::debugSymbolTable),
+ fatal_if(!kernelObj->loadLocalSymbols(&Loader::debugSymbolTable),
"Could not load kernel local symbols.");
}
diff --git a/src/sim/kernel_workload.hh b/src/sim/kernel_workload.hh
index b88051a..34406eb 100644
--- a/src/sim/kernel_workload.hh
+++ b/src/sim/kernel_workload.hh
@@ -98,9 +98,9 @@
}
bool
- insertSymbol(Addr address, const std::string &symbol) override
+ insertSymbol(const Loader::Symbol &symbol) override
{
- return kernelSymtab->insert(address, symbol);
+ return kernelSymtab->insert(symbol);
}
void initState() override;
diff --git a/src/sim/process.cc b/src/sim/process.cc
index a55362d..9a88163 100644
--- a/src/sim/process.cc
+++ b/src/sim/process.cc
@@ -155,13 +155,11 @@
image = objFile->buildImage();
- if (!::Loader::debugSymbolTable) {
- ::Loader::debugSymbolTable = new ::Loader::SymbolTable();
- if (!objFile->loadGlobalSymbols(::Loader::debugSymbolTable) ||
- !objFile->loadLocalSymbols(::Loader::debugSymbolTable) ||
- !objFile->loadWeakSymbols(::Loader::debugSymbolTable)) {
- delete ::Loader::debugSymbolTable;
- ::Loader::debugSymbolTable = nullptr;
+ if (::Loader::debugSymbolTable.empty()) {
+ if (!objFile->loadGlobalSymbols(&::Loader::debugSymbolTable) ||
+ !objFile->loadLocalSymbols(&::Loader::debugSymbolTable) ||
+ !objFile->loadWeakSymbols(&::Loader::debugSymbolTable)) {
+ ::Loader::debugSymbolTable.clear();
}
}
}
diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc
index 203afc0..2d87b05 100644
--- a/src/sim/pseudo_inst.cc
+++ b/src/sim/pseudo_inst.cc
@@ -242,8 +242,10 @@
if (!to_number(address, addr))
continue;
- if (!tc->getSystemPtr()->workload->insertSymbol(addr, symbol))
+ if (!tc->getSystemPtr()->workload->insertSymbol(
+ { Loader::Symbol::Binding::Global, symbol, addr })) {
continue;
+ }
DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr);
@@ -264,8 +266,10 @@
DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr);
- tc->getSystemPtr()->workload->insertSymbol(addr, symbol);
- Loader::debugSymbolTable->insert(addr, symbol);
+ tc->getSystemPtr()->workload->insertSymbol(
+ { Loader::Symbol::Binding::Global, symbol, addr });
+ Loader::debugSymbolTable.insert(
+ { Loader::Symbol::Binding::Global, symbol, addr });
}
uint64_t
diff --git a/src/sim/pseudo_inst.hh b/src/sim/pseudo_inst.hh
index 6a63812..982d6c8 100644
--- a/src/sim/pseudo_inst.hh
+++ b/src/sim/pseudo_inst.hh
@@ -59,16 +59,6 @@
namespace GuestABI
{
-template <typename T>
-struct Result<PseudoInstABI, T>
-{
- static void
- store(ThreadContext *tc, const T &ret)
- {
- // Don't do anything with the pseudo inst results by default.
- }
-};
-
template <>
struct Argument<PseudoInstABI, uint64_t>
{
@@ -134,9 +124,9 @@
* @return Whether the pseudo instruction was recognized/handled.
*/
-template <typename ABI>
+template <typename ABI, bool store_ret>
bool
-pseudoInst(ThreadContext *tc, uint8_t func, uint64_t &result)
+pseudoInstWork(ThreadContext *tc, uint8_t func, uint64_t &result)
{
DPRINTF(PseudoInst, "PseudoInst::pseudoInst(%i)\n", func);
@@ -160,11 +150,11 @@
return true;
case M5OP_QUIESCE_TIME:
- result = invokeSimcall<ABI>(tc, quiesceTime);
+ result = invokeSimcall<ABI, store_ret>(tc, quiesceTime);
return true;
case M5OP_RPNS:
- result = invokeSimcall<ABI>(tc, rpns);
+ result = invokeSimcall<ABI, store_ret>(tc, rpns);
return true;
case M5OP_WAKE_CPU:
@@ -180,7 +170,7 @@
return true;
case M5OP_INIT_PARAM:
- result = invokeSimcall<ABI>(tc, initParam);
+ result = invokeSimcall<ABI, store_ret>(tc, initParam);
return true;
case M5OP_LOAD_SYMBOL:
@@ -204,11 +194,11 @@
return true;
case M5OP_WRITE_FILE:
- result = invokeSimcall<ABI>(tc, writefile);
+ result = invokeSimcall<ABI, store_ret>(tc, writefile);
return true;
case M5OP_READ_FILE:
- result = invokeSimcall<ABI>(tc, readfile);
+ result = invokeSimcall<ABI, store_ret>(tc, readfile);
return true;
case M5OP_DEBUG_BREAK:
@@ -262,6 +252,21 @@
}
}
+template <typename ABI, bool store_ret=false>
+bool
+pseudoInst(ThreadContext *tc, uint8_t func, uint64_t &result)
+{
+ return pseudoInstWork<ABI, store_ret>(tc, func, result);
+}
+
+template <typename ABI, bool store_ret=true>
+bool
+pseudoInst(ThreadContext *tc, uint8_t func)
+{
+ uint64_t result;
+ return pseudoInstWork<ABI, store_ret>(tc, func, result);
+}
+
} // namespace PseudoInst
#endif // __SIM_PSEUDO_INST_HH__
diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc
index bffedfd..6d39823 100644
--- a/src/sim/syscall_emul.cc
+++ b/src/sim/syscall_emul.cc
@@ -1633,3 +1633,26 @@
return (status == -1) ? -errno : status;
}
+SyscallReturn
+getcpuFunc(SyscallDesc *desc, ThreadContext *tc,
+ Addr cpu_ptr, Addr node_ptr, Addr tcache_ptr)
+{
+ bool error = false;
+
+ // unsigned is the same size (4) on all Linux supported ISAs.
+ if (cpu_ptr != 0) {
+ TypedBufferArg<uint32_t> result(cpu_ptr);
+ *result = htog(tc->contextId(),
+ tc->getSystemPtr()->getGuestByteOrder());
+ error |= !result.copyOut(tc->getVirtProxy());
+ }
+
+ // Set a fixed NUMA node 0.
+ if (node_ptr != 0) {
+ TypedBufferArg<uint32_t> result(node_ptr);
+ *result = 0;
+ error |= !result.copyOut(tc->getVirtProxy());
+ }
+
+ return error ? -EFAULT : 0;
+}
diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh
index 290c48e..5b966cd 100644
--- a/src/sim/syscall_emul.hh
+++ b/src/sim/syscall_emul.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2013, 2015, 2019 ARM Limited
+ * Copyright (c) 2012-2013, 2015, 2019-2020 ARM Limited
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -354,6 +354,9 @@
int tgt_fd, int level, int optname,
Addr valPtr, socklen_t len);
+SyscallReturn getcpuFunc(SyscallDesc *desc, ThreadContext *tc,
+ Addr cpu_ptr, Addr node_ptr, Addr tcache_ptr);
+
// Target getsockname() handler.
SyscallReturn getsocknameFunc(SyscallDesc *desc, ThreadContext *tc,
int tgt_fd, Addr addrPtr, Addr lenPtr);
@@ -674,7 +677,8 @@
tgt->f_frsize = htog(host->f_frsize, bo);
#endif
#if defined(__linux__)
- memcpy(&tgt->f_spare, &host->f_spare, sizeof(host->f_spare));
+ memcpy(&tgt->f_spare, &host->f_spare,
+ std::min(sizeof(host->f_spare), sizeof(tgt->f_spare)));
#else
/*
* The fields are different sizes per OS. Don't bother with
@@ -1718,7 +1722,7 @@
ffdp->getFileName());
if (lib) {
- lib->loadAllSymbols(Loader::debugSymbolTable,
+ lib->loadAllSymbols(&Loader::debugSymbolTable,
lib->buildImage().minAddr(), start);
}
}
diff --git a/src/sim/ticked_object.cc b/src/sim/ticked_object.cc
index c6d1f98..7af439c 100644
--- a/src/sim/ticked_object.cc
+++ b/src/sim/ticked_object.cc
@@ -44,7 +44,7 @@
Stats::Scalar *imported_num_cycles,
Event::Priority priority) :
object(object_),
- event([this]{ processClockEvent(); }, name(), false, priority),
+ event([this]{ processClockEvent(); }, object_.name(), false, priority),
running(false),
lastStopped(0),
/* Allocate numCycles if an external stat wasn't passed in */
diff --git a/src/sim/workload.hh b/src/sim/workload.hh
index ca2dffb..e24aa74 100644
--- a/src/sim/workload.hh
+++ b/src/sim/workload.hh
@@ -50,7 +50,7 @@
virtual Loader::Arch getArch() const = 0;
virtual const Loader::SymbolTable *symtab(ThreadContext *tc) = 0;
- virtual bool insertSymbol(Addr address, const std::string &symbol) = 0;
+ virtual bool insertSymbol(const Loader::Symbol &symbol) = 0;
/** @{ */
/**
@@ -70,14 +70,12 @@
addFuncEvent(const Loader::SymbolTable *symtab, const char *lbl,
const std::string &desc, Args... args)
{
- Addr addr M5_VAR_USED = 0; // initialize only to avoid compiler warning
+ auto it = symtab->find(lbl);
+ if (it == symtab->end())
+ return nullptr;
- if (symtab->findAddress(lbl, addr)) {
- return new T(system, desc, fixFuncEventAddr(addr),
- std::forward<Args>(args)...);
- }
-
- return nullptr;
+ return new T(system, desc, fixFuncEventAddr(it->address),
+ std::forward<Args>(args)...);
}
template <class T>
diff --git a/src/systemc/dt/int/SConscript b/src/systemc/dt/int/SConscript
index 7b97d5f..26bb6ae 100644
--- a/src/systemc/dt/int/SConscript
+++ b/src/systemc/dt/int/SConscript
@@ -25,13 +25,22 @@
Import('*')
+from m5.util import compareVersions
+
if env['USE_SYSTEMC']:
+ if main['GCC'] and compareVersions(main['GCC_VERSION'], '10.1') >= 0:
+ disable_false_positives = {
+ "CCFLAGS": [ "-Wno-array-bounds",
+ "-Wno-stringop-overflow" ]
+ }
+ else:
+ disable_false_positives = {}
Source('messages.cc')
Source('sc_int_base.cc')
Source('sc_int_mask.cc')
Source('sc_length_param.cc')
Source('sc_nbexterns.cc')
Source('sc_nbutils.cc')
- Source('sc_signed.cc')
+ Source('sc_signed.cc', append=disable_false_positives)
Source('sc_uint_base.cc')
- Source('sc_unsigned.cc')
+ Source('sc_unsigned.cc', append=disable_false_positives)
diff --git a/src/unittest/nmtest.cc b/src/unittest/nmtest.cc
index f444a90..3601fa8 100644
--- a/src/unittest/nmtest.cc
+++ b/src/unittest/nmtest.cc
@@ -52,27 +52,24 @@
obj->loadLocalSymbols(&symtab);
if (argc == 2) {
- Loader::SymbolTable::ATable::const_iterator i =
- symtab.getAddrTable().begin();
- Loader::SymbolTable::ATable::const_iterator end =
- symtab.getAddrTable().end();
- while (i != end) {
- cprintf("%#x %s\n", i->first, i->second);
- ++i;
- }
+ for (const Loader::Symbol &symbol: symtab)
+ cprintf("%#x %s\n", symbol.address, symbol.name);
} else {
string symbol = argv[2];
Addr address;
if (symbol[0] == '0' && symbol[1] == 'x') {
+ Loader::SymbolTable::const_iterator it;
if (to_number(symbol, address) &&
- symtab.findSymbol(address, symbol))
- cprintf("address = %#x, symbol = %s\n", address, symbol);
- else
+ (it = symtab.find(address)) != symtab.end()) {
+ cprintf("address = %#x, symbol = %s\n", address, it->name);
+ } else {
cprintf("address = %#x was not found\n", address);
+ }
} else {
- if (symtab.findAddress(symbol, address))
- cprintf("symbol = %s address = %#x\n", symbol, address);
+ auto it = symtab.find(symbol);
+ if (it != symtab.end())
+ cprintf("symbol = %s address = %#x\n", symbol, it->address);
else
cprintf("symbol = %s was not found\n", symbol);
}
diff --git a/src/unittest/symtest.cc b/src/unittest/symtest.cc
index fd006b4..369e1a4 100644
--- a/src/unittest/symtest.cc
+++ b/src/unittest/symtest.cc
@@ -60,19 +60,21 @@
Addr address;
if (!to_number(symbol, address)) {
- if (!symtab.findAddress(symbol, address)) {
+ auto it = symtab.find(symbol);
+ if (it == symtab.end()) {
cout << "could not find symbol: " << symbol << endl;
exit(1);
}
- cout << symbol << " -> " << "0x" << hex << address << endl;
+ cout << symbol << " -> " << "0x" << hex << it->address << endl;
} else {
- if (!symtab.findSymbol(address, symbol)) {
+ auto it = symtab.find(address);
+ if (it == symtab.end()) {
cout << "could not find address: " << address << endl;
exit(1);
}
- cout << "0x" << hex << address << " -> " << symbol<< endl;
+ cout << "0x" << hex << address << " -> " << it->name << endl;
}
return 0;
diff --git a/tests/gem5/memory/test.py b/tests/gem5/memory/test.py
index bf87a27..beed084 100644
--- a/tests/gem5/memory/test.py
+++ b/tests/gem5/memory/test.py
@@ -71,6 +71,8 @@
null_tests = [
('garnet_synth_traffic', ['--sim-cycles', '5000000']),
('memcheck', ['--maxtick', '2000000000', '--prefetchers']),
+ ('ruby_mem_test', ['--abs-max-tick', '20000000',
+ '--functional', '10']),
('ruby_random_test', ['--maxloads', '5000']),
('ruby_direct_test', ['--requests', '50000']),
]
diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile
new file mode 100644
index 0000000..485a406
--- /dev/null
+++ b/util/dockerfiles/gcn-gpu/Dockerfile
@@ -0,0 +1,132 @@
+FROM ubuntu:16.04
+
+# Should be minimal needed packages
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ findutils \
+ file \
+ libunwind8 \
+ libunwind-dev \
+ pkg-config \
+ build-essential \
+ gcc-multilib \
+ g++-multilib \
+ git \
+ ca-certificates \
+ m4 \
+ scons \
+ zlib1g \
+ zlib1g-dev \
+ libprotobuf-dev \
+ protobuf-compiler \
+ libprotoc-dev \
+ libgoogle-perftools-dev \
+ python-dev \
+ python \
+ python-yaml \
+ wget \
+ libpci3 \
+ libelf1 \
+ libelf-dev \
+ cmake \
+ openssl \
+ libssl-dev \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libboost-dev
+
+ARG gem5_dist=http://dist.gem5.org/dist/current
+
+# Install ROCm 1.6 binaries
+RUN wget -qO- ${gem5_dist}/apt_1.6.2.tar.bz2 \
+ | tar -xjv \
+ && cd apt_1.6.2/pool/main/ \
+ && dpkg -i h/hsakmt-roct-dev/* \
+ && dpkg -i h/hsa-ext-rocr-dev/* \
+ && dpkg -i h/hsa-rocr-dev/* \
+ && dpkg -i r/rocm-utils/* \
+ && dpkg -i h/hcc/* \
+ && dpkg -i r/rocm-opencl/* \
+ && dpkg -i r/rocm-opencl-dev/*
+
+# Get ROCm libraries we need to compile from source (and ROCm-profiler)
+RUN git clone --single-branch https://github.com/ROCm-Developer-Tools/HIP/ && \
+ git clone --single-branch https://github.com/ROCmSoftwarePlatform/hipBLAS/ && \
+ git clone --single-branch https://github.com/ROCmSoftwarePlatform/rocBLAS/ && \
+ git clone --single-branch https://github.com/ROCmSoftwarePlatform/MIOpenGEMM/ && \
+ git clone --single-branch https://github.com/ROCmSoftwarePlatform/MIOpen/ && \
+ git clone --single-branch https://github.com/RadeonOpenCompute/rocm-cmake/ && \
+ git clone --single-branch https://github.com/rocmarchive/ROCm-Profiler.git
+
+# Apply patches to various repos
+RUN mkdir -p /patch && cd /patch && \
+ wget ${gem5_dist}/rocm_patches/hipBLAS.patch && \
+ wget ${gem5_dist}/rocm_patches/hip.patch && \
+ wget ${gem5_dist}/rocm_patches/miopen.patch && \
+ wget ${gem5_dist}/rocm_patches/rocBLAS.patch
+
+RUN git -C /HIP/ checkout 0e3d824e && git -C /HIP/ apply /patch/hip.patch && \
+ git -C /hipBLAS/ checkout ee57787e && git -C /hipBLAS/ apply /patch/hipBLAS.patch && \
+ git -C /rocBLAS/ checkout cbff4b4e && git -C /rocBLAS/ apply /patch/rocBLAS.patch && \
+ git -C /MIOpenGEMM/ checkout 9547fb9e && \
+ git -C /MIOpen/ checkout a9949e30 && git -C /MIOpen/ apply /patch/miopen.patch
+
+ENV ROCM_PATH /opt/rocm
+ENV HCC_HOME ${ROCM_PATH}/hcc
+ENV HSA_PATH ${ROCM_PATH}/hsa
+ENV HIP_PATH ${ROCM_PATH}/hip
+ENV HIP_PLATFORM hcc
+ENV PATH ${ROCM_PATH}/bin:${HCC_HOME}/bin:${HSA_PATH}/bin:${HIP_PATH}/bin:${PATH}
+ENV HCC_AMDGPU_TARGET gfx801
+
+# Create build dirs for machine learning ROCm installs
+RUN mkdir -p /HIP/build && \
+ mkdir -p /rocBLAS/build && \
+ mkdir -p /hipBLAS/build && \
+ mkdir -p /rocm-cmake/build && \
+ mkdir -p /MIOpenGEMM/build && \
+ mkdir -p /MIOpen/build
+
+# Do the builds, empty build dir to trim image size
+WORKDIR /HIP/build
+RUN cmake .. && make -j$(nproc) && make install && rm -rf *
+
+WORKDIR /rocBLAS/build
+RUN CXX=/opt/rocm/bin/hcc cmake -DCMAKE_CXX_FLAGS="--amdgpu-target=gfx801" .. && \
+ make -j$(nproc) && make install && rm -rf *
+
+WORKDIR /hipBLAS/build
+RUN CXX=/opt/rocm/bin/hcc cmake -DCMAKE_CXX_FLAGS="--amdgpu-target=gfx801" .. && \
+ make -j$(nproc) && make install && rm -rf *
+
+WORKDIR /rocm-cmake/build
+RUN cmake .. && cmake --build . --target install && rm -rf *
+
+WORKDIR /MIOpenGEMM/build
+RUN cmake .. && make miopengemm && make install && rm -rf *
+
+# Should link this in as a volume if at all possible
+RUN mkdir -p /.cache/miopen && chmod 777 /.cache/miopen
+
+WORKDIR /MIOpen/build
+RUN CXX=/opt/rocm/hcc/bin/hcc cmake \
+ -DCMAKE_BUILD_TYPE=Debug \
+ -DCMAKE_INSTALL_PREFIX=/opt/rocm \
+ -DMIOPEN_BACKEND=HIP \
+ -DCMAKE_PREFIX_PATH="/opt/rocm/hip;/opt/rocm/hcc;/opt/rocm/rocdl;/opt/rocm/miopengemm;/opt/rocm/hsa" \
+ -DMIOPEN_CACHE_DIR=/.cache/miopen \
+ -DMIOPEN_AMDGCN_ASSEMBLER_PATH=/opt/rocm/opencl/bin \
+ -DCMAKE_CXX_FLAGS="-isystem /usr/include/x86_64-linux-gnu" .. && \
+ make -j$(nproc) && make install && rm -rf *
+
+# Create performance DB for gfx801. May need personal dbs still
+WORKDIR /opt/rocm/miopen/share/miopen/db
+RUN ln -s gfx803_64.cd.pdb.txt gfx801_8.cd.pdb.txt && \
+ ln -s gfx803_64.cd.pdb.txt gfx801_16.cd.pdb.txt && \
+ ln -s gfx803_64.cd.pdb.txt gfx801_32.cd.pdb.txt && \
+ ln -s gfx803_64.cd.pdb.txt gfx801_64.cd.pdb.txt
+
+# Install profiler from .deb file, works for 1.6.2
+WORKDIR /ROCm-Profiler
+RUN dpkg -i package/rocm-profiler_4.0.6036_amd64.deb
+
+WORKDIR /
diff --git a/util/dockerfiles/gcn-gpu/README.md b/util/dockerfiles/gcn-gpu/README.md
new file mode 100644
index 0000000..0764cad
--- /dev/null
+++ b/util/dockerfiles/gcn-gpu/README.md
@@ -0,0 +1,27 @@
+## gcn3-gpu dockerfile
+This dockerfile contains all the dependences necessary to run GPU applications in gem5 using the gcn3 APU model
+
+### Building the image
+```
+docker build -t <image_name> .
+```
+
+### Building gem5 using the image
+The following command assumes the gem5 directory is a subdirectory of your current directory
+```
+docker run --rm -v $PWD/gem5:/gem5 -w /gem5 <image_name> scons -sQ -j$(nproc) build/GCN3_X86/gem5.opt
+```
+
+### Test gem5 using a prebuilt application
+```
+wget http://dist.gem5.org/dist/current/test-progs/hip_sample_bins/MatrixTranspose
+docker run --rm -v $PWD/MatrixTranspose:/MatrixTranspose -v $PWD/public_gem5:/gem5 -w /gem5 \
+ <image_name> build/GCN3_X86/gem5.opt configs/example/apu_se.py -n2 --benchmark-root=/ -cMatrixTranspose
+```
+
+### Notes
+* When using the `-v` flag, the path to the input file/directory needs to be the absolute path; symlinks don't work
+* Currently linking in an AFS volume is not supported, as it uses ACLs instead of owner/group IDs
+
+### ToDo
+* Add square to gem5-resources github, add directions for building and running an application
diff --git a/util/gem5img.py b/util/gem5img.py
index 607f034..51a5487 100755
--- a/util/gem5img.py
+++ b/util/gem5img.py
@@ -135,11 +135,17 @@
exit(returncode)
lines = out.splitlines()
# Make sure the first few lines of the output look like what we expect.
- assert(lines[0][0] == '#')
- assert(lines[1] == 'unit: sectors')
- assert(lines[2] == '')
- # This line has information about the first partition.
- chunks = lines[3].split()
+ assert(lines[0][0] == '#' or lines[0].startswith('label:'))
+ assert(lines[1] == 'unit: sectors' or lines[1].startswith('label-id:'))
+ assert(lines[2] == '' or lines[2].startswith('device:'))
+ if lines[0][0] == '#' :
+ # Parsing an 'old style' dump oputput
+ # Line 4 has information about the first partition.
+ chunks = lines[3].split()
+ else :
+ # Parsing a 'new style' dump oputput
+ # Line 6 has information about the first partition.
+ chunks = lines[5].split()
# The fourth chunk is the offset of the partition in sectors followed by
# a comma. We drop the comma and convert that to an integer.
sectors = string.atoi(chunks[3][:-1])
@@ -282,12 +288,11 @@
[('file', 'Name of the image file.')])
def partition(dev, cylinders, heads, sectors):
- # Use fdisk to partition the device
- comStr = '0,\n;\n;\n;\n'
- return runPriv([findProg('sfdisk'), '--no-reread', '-D', \
- '-C', "%d" % cylinders, \
- '-H', "%d" % heads, \
- '-S', "%d" % sectors, \
+ # Use sfdisk to partition the device
+ # The specified options are intended to work with both new and old
+ # versions of sfdisk (see https://askubuntu.com/a/819614)
+ comStr = ';'
+ return runPriv([findProg('sfdisk'), '--no-reread', '-u', 'S', '-L', \
str(dev)], inputVal=comStr)
def partitionComFunc(options, args):
diff --git a/util/m5/src/SConscript b/util/m5/src/SConscript
index c2a3ede..1bf781c 100644
--- a/util/m5/src/SConscript
+++ b/util/m5/src/SConscript
@@ -29,6 +29,7 @@
# Raw source files.
m5_mmap = 'm5_mmap.c'
+args = 'args.c'
m5 = 'm5.c'
jni = 'jni_gem5Op.c'
lua = 'lua_gem5Op.c'
@@ -63,7 +64,8 @@
# The m5 stand alone command line utility.
#
ct_support = list([ File('%s_call_type.c' % ct.name) for ct in call_types ])
-m5_bin = static_env.Program('out/m5', ct_support + [ m5, m5_mmap, libm5 ])
+m5_bin = static_env.Program('out/m5',
+ ct_support + [ args, m5, m5_mmap, libm5 ])
# The shared version of the m5 op call sights, used by mutliple targets below.
diff --git a/util/m5/src/args.c b/util/m5/src/args.c
new file mode 100644
index 0000000..f1896c8
--- /dev/null
+++ b/util/m5/src/args.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2011, 2017 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+parse_int_args(int argc, char *argv[], uint64_t ints[], int len)
+{
+ if (argc > len)
+ return 0;
+
+// On 32 bit platforms we need to use strtoull to do the conversion
+#ifdef __LP64__
+#define strto64 strtoul
+#else
+#define strto64 strtoull
+#endif
+ int i;
+ for (i = 0; i < len; ++i)
+ ints[i] = (i < argc) ? strto64(argv[i], NULL, 0) : 0;
+
+#undef strto64
+ return 1;
+}
+
+int
+pack_str_into_regs(const char *str, uint64_t regs[], int num_regs)
+{
+ const size_t RegSize = sizeof(regs[0]);
+ const size_t MaxLen = num_regs * RegSize;
+
+ size_t len = strlen(str);
+
+ if (len > MaxLen)
+ return 0;
+
+ memset(regs, 0, MaxLen);
+
+ while (len) {
+ for (int offset = 0; offset < RegSize && len; offset++, len--) {
+ int shift = offset * 8;
+ *regs |= (uint64_t)(uint8_t)*str++ << shift;
+ }
+ regs++;
+ }
+ return 1;
+}
diff --git a/util/m5/src/args.h b/util/m5/src/args.h
new file mode 100644
index 0000000..530462e
--- /dev/null
+++ b/util/m5/src/args.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2011, 2017 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARGS_H__
+#define __ARGS_H__
+
+#include <stdint.h>
+
+int parse_int_args(int argc, char *argv[], uint64_t ints[], int len);
+int pack_str_into_regs(const char *str, uint64_t regs[], int num_regs);
+
+#endif // __ARGS_H__
diff --git a/util/m5/src/m5.c b/util/m5/src/m5.c
index cda6bf6..11e7d60 100644
--- a/util/m5/src/m5.c
+++ b/util/m5/src/m5.c
@@ -51,6 +51,8 @@
#include <gem5/asm/generic/m5ops.h>
#include <gem5/m5ops.h>
+
+#include "args.h"
#include "call_type.h"
#include "dispatch_table.h"
#include "m5_mmap.h"
@@ -59,47 +61,6 @@
char *command = "unspecified";
void usage();
-void
-parse_int_args(int argc, char *argv[], uint64_t ints[], int len)
-{
- if (argc > len)
- usage();
-
-// On 32 bit platforms we need to use strtoull to do the conversion
-#ifdef __LP64__
-#define strto64 strtoul
-#else
-#define strto64 strtoull
-#endif
- int i;
- for (i = 0; i < len; ++i)
- ints[i] = (i < argc) ? strto64(argv[i], NULL, 0) : 0;
-
-#undef strto64
-}
-
-void
-pack_str_into_regs(const char *str, uint64_t regs[], int num_regs)
-{
- const size_t RegSize = sizeof(regs[0]);
- const size_t MaxLen = num_regs * RegSize;
-
- size_t len = strlen(str);
-
- if (len > MaxLen)
- usage();
-
- memset(regs, 0, MaxLen);
-
- while (len) {
- for (int offset = 0; offset < RegSize && len; offset++, len--) {
- int shift = offset * 8;
- *regs |= (uint64_t)(uint8_t)*str++ << shift;
- }
- regs++;
- }
-}
-
int
read_file(DispatchTable *dt, int dest_fid)
{
@@ -168,7 +129,8 @@
usage();
uint64_t ints[1];
- parse_int_args(argc, argv, ints, 1);
+ if (!parse_int_args(argc, argv, ints, 1))
+ usage();
(*dt->m5_exit)(ints[0]);
}
@@ -179,7 +141,8 @@
usage();
uint64_t ints[2] = {0,0};
- parse_int_args(argc, argv, ints, argc);
+ if (!parse_int_args(argc, argv, ints, argc))
+ usage();
(*dt->m5_fail)(ints[1], ints[0]);
}
@@ -187,7 +150,8 @@
do_reset_stats(DispatchTable *dt, int argc, char *argv[])
{
uint64_t ints[2];
- parse_int_args(argc, argv, ints, 2);
+ if (!parse_int_args(argc, argv, ints, 2))
+ usage();
(*dt->m5_reset_stats)(ints[0], ints[1]);
}
@@ -195,7 +159,8 @@
do_dump_stats(DispatchTable *dt, int argc, char *argv[])
{
uint64_t ints[2];
- parse_int_args(argc, argv, ints, 2);
+ if (!parse_int_args(argc, argv, ints, 2))
+ usage();
(*dt->m5_dump_stats)(ints[0], ints[1]);
}
@@ -203,7 +168,8 @@
do_dump_reset_stats(DispatchTable *dt, int argc, char *argv[])
{
uint64_t ints[2];
- parse_int_args(argc, argv, ints, 2);
+ if (!parse_int_args(argc, argv, ints, 2))
+ usage();
(*dt->m5_dump_reset_stats)(ints[0], ints[1]);
}
@@ -232,7 +198,8 @@
do_checkpoint(DispatchTable *dt, int argc, char *argv[])
{
uint64_t ints[2];
- parse_int_args(argc, argv, ints, 2);
+ if (!parse_int_args(argc, argv, ints, 2))
+ usage();
(*dt->m5_checkpoint)(ints[0], ints[1]);
}
@@ -264,7 +231,8 @@
usage();
uint64_t key_str[2];
- pack_str_into_regs(argc == 0 ? "" : argv[0], key_str, 2);
+ if (!pack_str_into_regs(argc == 0 ? "" : argv[0], key_str, 2))
+ usage();
uint64_t val = (*dt->m5_init_param)(key_str[0], key_str[1]);
printf("%"PRIu64, val);
}