blob: 097f36735d4a1cc8028fd4dc0fe2ed165467b42c [file] [log] [blame]
# Copyright (c) 2021 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
Definitions for CHI nodes and controller types. These are used by
create_system in configs/ruby/CHI.py or may be used in custom configuration
scripts. When used with create_system, the user may provide an additional
configuration file as the --chi-config parameter to specialize the classes
defined here.
When using the CustomMesh topology, --chi-config must be provided with
specialization of the NoC_Param classes defining the NoC dimensions and
node to router binding. See configs/example/noc_config/2x4.py for an example.
'''
import math
import m5
from m5.objects import *
class Versions:
'''
Helper class to obtain unique ids for a given controller class.
These are passed as the 'version' parameter when creating the controller.
'''
_seqs = 0
@classmethod
def getSeqId(cls):
val = cls._seqs
cls._seqs += 1
return val
_version = {}
@classmethod
def getVersion(cls, tp):
if tp not in cls._version:
cls._version[tp] = 0
val = cls._version[tp]
cls._version[tp] = val + 1
return val
class NoC_Params:
'''
Default parameters for the interconnect. The value of data_width is
also used to set the data_channel_size for all CHI controllers.
(see configs/ruby/CHI.py)
'''
router_link_latency = 1
node_link_latency = 1
router_latency = 1
router_buffer_size = 4
cntrl_msg_size = 8
data_width = 32
cross_links = []
cross_link_latency = 0
class CHI_Node(SubSystem):
'''
Base class with common functions for setting up Cache or Memory
controllers that are part of a CHI RNF, RNFI, HNF, or SNF nodes.
Notice getNetworkSideControllers and getAllControllers must be implemented
in the derived classes.
'''
class NoC_Params:
'''
NoC config. parameters and bindings required for CustomMesh topology.
Maps 'num_nodes_per_router' CHI nodes to each router provided in
'router_list'. This assumes len(router_list)*num_nodes_per_router
equals the number of nodes
If 'num_nodes_per_router' is left undefined, we circulate around
'router_list' until all nodes are mapped.
See 'distributeNodes' in configs/topologies/CustomMesh.py
'''
num_nodes_per_router = None
router_list = None
def __init__(self, ruby_system):
super(CHI_Node, self).__init__()
self._ruby_system = ruby_system
self._network = ruby_system.network
def getNetworkSideControllers(self):
'''
Returns all ruby controllers that need to be connected to the
network
'''
raise NotImplementedError()
def getAllControllers(self):
'''
Returns all ruby controllers associated with this node
'''
raise NotImplementedError()
def setDownstream(self, cntrls):
'''
Sets cntrls as the downstream list of all controllers in this node
'''
for c in self.getNetworkSideControllers():
c.downstream_destinations = cntrls
def connectController(self, cntrl):
'''
Creates and configures the messages buffers for the CHI input/output
ports that connect to the network
'''
cntrl.reqOut = MessageBuffer()
cntrl.rspOut = MessageBuffer()
cntrl.snpOut = MessageBuffer()
cntrl.datOut = MessageBuffer()
cntrl.reqIn = MessageBuffer()
cntrl.rspIn = MessageBuffer()
cntrl.snpIn = MessageBuffer()
cntrl.datIn = MessageBuffer()
# All CHI ports are always connected to the network.
# Controllers that are not part of the getNetworkSideControllers list
# still communicate using internal routers, thus we need to wire-up the
# ports
cntrl.reqOut.out_port = self._network.in_port
cntrl.rspOut.out_port = self._network.in_port
cntrl.snpOut.out_port = self._network.in_port
cntrl.datOut.out_port = self._network.in_port
cntrl.reqIn.in_port = self._network.out_port
cntrl.rspIn.in_port = self._network.out_port
cntrl.snpIn.in_port = self._network.out_port
cntrl.datIn.in_port = self._network.out_port
class TriggerMessageBuffer(MessageBuffer):
'''
MessageBuffer for triggering internal controller events.
These buffers should not be affected by the Ruby tester randomization
and allow poping messages enqueued in the same cycle.
'''
randomization = 'disabled'
allow_zero_latency = True
class OrderedTriggerMessageBuffer(TriggerMessageBuffer):
ordered = True
class CHI_Cache_Controller(Cache_Controller):
'''
Default parameters for a Cache controller
The Cache_Controller can also be used as a DMA requester or as
a pure directory if all cache allocation policies are disabled.
'''
def __init__(self, ruby_system):
super(CHI_Cache_Controller, self).__init__(
version = Versions.getVersion(Cache_Controller),
ruby_system = ruby_system,
mandatoryQueue = MessageBuffer(),
prefetchQueue = MessageBuffer(),
triggerQueue = TriggerMessageBuffer(),
retryTriggerQueue = OrderedTriggerMessageBuffer(),
replTriggerQueue = OrderedTriggerMessageBuffer(),
reqRdy = TriggerMessageBuffer(),
snpRdy = TriggerMessageBuffer())
# Set somewhat large number since we really a lot on internal
# triggers. To limit the controller performance, tweak other
# params such as: input port buffer size, cache banks, and output
# port latency
self.transitions_per_cycle = 128
# This should be set to true in the data cache controller to enable
# timeouts on unique lines when a store conditional fails
self.sc_lock_enabled = False
class CHI_L1Controller(CHI_Cache_Controller):
'''
Default parameters for a L1 Cache controller
'''
def __init__(self, ruby_system, sequencer, cache, prefetcher):
super(CHI_L1Controller, self).__init__(ruby_system)
self.sequencer = sequencer
self.cache = cache
self.use_prefetcher = False
self.send_evictions = True
self.is_HN = False
self.enable_DMT = False
self.enable_DCT = False
# Strict inclusive MOESI
self.allow_SD = True
self.alloc_on_seq_acc = True
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = True
self.alloc_on_readunique = True
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = True
self.dealloc_backinv_shared = True
# Some reasonable default TBE params
self.number_of_TBEs = 16
self.number_of_repl_TBEs = 16
self.number_of_snoop_TBEs = 4
self.unify_repl_TBEs = False
class CHI_L2Controller(CHI_Cache_Controller):
'''
Default parameters for a L2 Cache controller
'''
def __init__(self, ruby_system, cache, prefetcher):
super(CHI_L2Controller, self).__init__(ruby_system)
self.sequencer = NULL
self.cache = cache
self.use_prefetcher = False
self.allow_SD = True
self.is_HN = False
self.enable_DMT = False
self.enable_DCT = False
self.send_evictions = False
# Strict inclusive MOESI
self.alloc_on_seq_acc = False
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = True
self.alloc_on_readunique = True
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = True
self.dealloc_backinv_shared = True
# Some reasonable default TBE params
self.number_of_TBEs = 32
self.number_of_repl_TBEs = 32
self.number_of_snoop_TBEs = 16
self.unify_repl_TBEs = False
class CHI_HNFController(CHI_Cache_Controller):
'''
Default parameters for a coherent home node (HNF) cache controller
'''
def __init__(self, ruby_system, cache, prefetcher, addr_ranges):
super(CHI_HNFController, self).__init__(ruby_system)
self.sequencer = NULL
self.cache = cache
self.use_prefetcher = False
self.addr_ranges = addr_ranges
self.allow_SD = True
self.is_HN = True
self.enable_DMT = True
self.enable_DCT = True
self.send_evictions = False
# MOESI / Mostly inclusive for shared / Exclusive for unique
self.alloc_on_seq_acc = False
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = True
self.alloc_on_readunique = False
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.dealloc_on_unique = True
self.dealloc_on_shared = False
self.dealloc_backinv_unique = False
self.dealloc_backinv_shared = False
# Some reasonable default TBE params
self.number_of_TBEs = 32
self.number_of_repl_TBEs = 32
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.unify_repl_TBEs = False
class CHI_DMAController(CHI_Cache_Controller):
'''
Default parameters for a DMA controller
'''
def __init__(self, ruby_system, sequencer):
super(CHI_DMAController, self).__init__(ruby_system)
self.sequencer = sequencer
class DummyCache(RubyCache):
dataAccessLatency = 0
tagAccessLatency = 1
size = "128"
assoc = 1
self.use_prefetcher = False
self.cache = DummyCache()
self.sequencer.dcache = NULL
# All allocations are false
# Deallocations are true (don't really matter)
self.allow_SD = False
self.is_HN = False
self.enable_DMT = False
self.enable_DCT = False
self.alloc_on_seq_acc = False
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = False
self.alloc_on_readunique = False
self.alloc_on_readonce = False
self.alloc_on_writeback = False
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = False
self.dealloc_backinv_shared = False
self.send_evictions = False
self.number_of_TBEs = 16
self.number_of_repl_TBEs = 1
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.unify_repl_TBEs = False
class CPUSequencerWrapper:
'''
Other generic configuration scripts assume a matching number of sequencers
and cpus. This wraps the instruction and data sequencer so they are
compatible with the other scripts. This assumes all scripts are using
connectCpuPorts/connectIOPorts to bind ports
'''
def __init__(self, iseq, dseq):
# use this style due to __setattr__ override below
self.__dict__['inst_seq'] = iseq
self.__dict__['data_seq'] = dseq
self.__dict__['support_data_reqs'] = True
self.__dict__['support_inst_reqs'] = True
# Compatibility with certain scripts that wire up ports
# without connectCpuPorts
self.__dict__['in_ports'] = dseq.in_ports
def connectCpuPorts(self, cpu):
assert(isinstance(cpu, BaseCPU))
cpu.icache_port = self.inst_seq.in_ports
for p in cpu._cached_ports:
if str(p) != 'icache_port':
exec('cpu.%s = self.data_seq.in_ports' % p)
cpu.connectUncachedPorts(
self.data_seq.in_ports, self.data_seq.interrupt_out_port)
def connectIOPorts(self, piobus):
self.data_seq.connectIOPorts(piobus)
def __setattr__(self, name, value):
setattr(self.inst_seq, name, value)
setattr(self.data_seq, name, value)
class CHI_RNF(CHI_Node):
'''
Defines a CHI request node.
Notice all contollers and sequencers are set as children of the cpus, so
this object acts more like a proxy for seting things up and has no topology
significance unless the cpus are set as its children at the top level
'''
def __init__(self, cpus, ruby_system,
l1Icache_type, l1Dcache_type,
cache_line_size,
l1Iprefetcher_type=None, l1Dprefetcher_type=None):
super(CHI_RNF, self).__init__(ruby_system)
self._block_size_bits = int(math.log(cache_line_size, 2))
# All sequencers and controllers
self._seqs = []
self._cntrls = []
# Last level controllers in this node, i.e., the ones that will send
# requests to the home nodes
self._ll_cntrls = []
self._cpus = cpus
# First creates L1 caches and sequencers
for cpu in self._cpus:
cpu.inst_sequencer = RubySequencer(version = Versions.getSeqId(),
ruby_system = ruby_system)
cpu.data_sequencer = RubySequencer(version = Versions.getSeqId(),
ruby_system = ruby_system)
self._seqs.append(CPUSequencerWrapper(cpu.inst_sequencer,
cpu.data_sequencer))
# caches
l1i_cache = l1Icache_type(start_index_bit = self._block_size_bits,
is_icache = True)
l1d_cache = l1Dcache_type(start_index_bit = self._block_size_bits,
is_icache = False)
# Placeholders for future prefetcher support
if l1Iprefetcher_type != None or l1Dprefetcher_type != None:
m5.fatal('Prefetching not supported yet')
l1i_pf = NULL
l1d_pf = NULL
# cache controllers
cpu.l1i = CHI_L1Controller(ruby_system, cpu.inst_sequencer,
l1i_cache, l1i_pf)
cpu.l1d = CHI_L1Controller(ruby_system, cpu.data_sequencer,
l1d_cache, l1d_pf)
cpu.inst_sequencer.dcache = NULL
cpu.data_sequencer.dcache = cpu.l1d.cache
cpu.l1d.sc_lock_enabled = True
cpu._ll_cntrls = [cpu.l1i, cpu.l1d]
for c in cpu._ll_cntrls:
self._cntrls.append(c)
self.connectController(c)
self._ll_cntrls.append(c)
def getSequencers(self):
return self._seqs
def getAllControllers(self):
return self._cntrls
def getNetworkSideControllers(self):
return self._cntrls
def setDownstream(self, cntrls):
for c in self._ll_cntrls:
c.downstream_destinations = cntrls
def getCpus(self):
return self._cpus
# Adds a private L2 for each cpu
def addPrivL2Cache(self, cache_type, pf_type=None):
self._ll_cntrls = []
for cpu in self._cpus:
l2_cache = cache_type(start_index_bit = self._block_size_bits,
is_icache = False)
if pf_type != None:
m5.fatal('Prefetching not supported yet')
l2_pf = NULL
cpu.l2 = CHI_L2Controller(self._ruby_system, l2_cache, l2_pf)
self._cntrls.append(cpu.l2)
self.connectController(cpu.l2)
self._ll_cntrls.append(cpu.l2)
for c in cpu._ll_cntrls:
c.downstream_destinations = [cpu.l2]
cpu._ll_cntrls = [cpu.l2]
class CHI_HNF(CHI_Node):
'''
Encapsulates an HNF cache/directory controller.
Before the first controller is created, the class method
CHI_HNF.createAddrRanges must be called before creating any CHI_HNF object
to set-up the interleaved address ranges used by the HNFs
'''
class NoC_Params(CHI_Node.NoC_Params):
'''HNFs may also define the 'pairing' parameter to allow pairing'''
pairing = None
_addr_ranges = []
@classmethod
def createAddrRanges(cls, sys_mem_ranges, cache_line_size, num_hnfs):
# Create the HNFs interleaved addr ranges
block_size_bits = int(math.log(cache_line_size, 2))
cls._addr_ranges = []
llc_bits = int(math.log(num_hnfs, 2))
numa_bit = block_size_bits + llc_bits - 1
for i in range(num_hnfs):
ranges = []
for r in sys_mem_ranges:
addr_range = AddrRange(r.start, size = r.size(),
intlvHighBit = numa_bit,
intlvBits = llc_bits,
intlvMatch = i)
ranges.append(addr_range)
cls._addr_ranges.append((ranges, numa_bit, i))
@classmethod
def getAddrRanges(cls, hnf_idx):
assert(len(cls._addr_ranges) != 0)
return cls._addr_ranges[hnf_idx]
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, hnf_idx, ruby_system, llcache_type, parent):
super(CHI_HNF, self).__init__(ruby_system)
addr_ranges,intlvHighBit,intlvMatch = self.getAddrRanges(hnf_idx)
# All ranges should have the same interleaving
assert(len(addr_ranges) >= 1)
assert(intlvMatch == hnf_idx)
ll_cache = llcache_type(start_index_bit = intlvHighBit + 1)
self._cntrl = CHI_HNFController(ruby_system, ll_cache, NULL,
addr_ranges)
if parent == None:
self.cntrl = self._cntrl
else:
parent.cntrl = self._cntrl
self.connectController(self._cntrl)
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
class CHI_SNF_Base(CHI_Node):
'''
Creates CHI node controllers for the memory controllers
'''
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, ruby_system, parent):
super(CHI_SNF_Base, self).__init__(ruby_system)
self._cntrl = Memory_Controller(
version = Versions.getVersion(Memory_Controller),
ruby_system = ruby_system,
triggerQueue = TriggerMessageBuffer(),
responseFromMemory = MessageBuffer(),
requestToMemory = MessageBuffer(ordered = True),
reqRdy = TriggerMessageBuffer())
self.connectController(self._cntrl)
if parent:
parent.cntrl = self._cntrl
else:
self.cntrl = self._cntrl
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
def getMemRange(self, mem_ctrl):
# TODO need some kind of transparent API for
# MemCtrl+DRAM vs SimpleMemory
if hasattr(mem_ctrl, 'range'):
return mem_ctrl.range
else:
return mem_ctrl.dram.range
class CHI_SNF_BootMem(CHI_SNF_Base):
'''
Create the SNF for the boot memory
'''
def __init__(self, ruby_system, parent, bootmem):
super(CHI_SNF_BootMem, self).__init__(ruby_system, parent)
self._cntrl.memory_out_port = bootmem.port
self._cntrl.addr_ranges = self.getMemRange(bootmem)
class CHI_SNF_MainMem(CHI_SNF_Base):
'''
Create the SNF for a list main memory controllers
'''
def __init__(self, ruby_system, parent, mem_ctrl = None):
super(CHI_SNF_MainMem, self).__init__(ruby_system, parent)
if mem_ctrl:
self._cntrl.memory_out_port = mem_ctrl.port
self._cntrl.addr_ranges = self.getMemRange(mem_ctrl)
# else bind ports and range later
class CHI_RNI_Base(CHI_Node):
'''
Request node without cache / DMA
'''
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, ruby_system, parent):
super(CHI_RNI_Base, self).__init__(ruby_system)
self._sequencer = RubySequencer(version = Versions.getSeqId(),
ruby_system = ruby_system,
clk_domain = ruby_system.clk_domain)
self._cntrl = CHI_DMAController(ruby_system, self._sequencer)
if parent:
parent.cntrl = self._cntrl
else:
self.cntrl = self._cntrl
self.connectController(self._cntrl)
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
class CHI_RNI_DMA(CHI_RNI_Base):
'''
DMA controller wiredup to a given dma port
'''
def __init__(self, ruby_system, dma_port, parent):
super(CHI_RNI_DMA, self).__init__(ruby_system, parent)
assert(dma_port != None)
self._sequencer.in_ports = dma_port
class CHI_RNI_IO(CHI_RNI_Base):
'''
DMA controller wiredup to ruby_system IO port
'''
def __init__(self, ruby_system, parent):
super(CHI_RNI_IO, self).__init__(ruby_system, parent)
ruby_system._io_port = self._sequencer