blob: 740c748d0521c080b386350abf34d070a0e1e59f [file] [log] [blame]
# Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# Configure the TLB hierarchy
# Places which would probably need to be modified if you
# want a different hierarchy are specified by a <Modify here .. >'
# comment
import m5
from m5.objects import *
def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):
if full_system:
constructor_call = "VegaGPUTLB(\
gpu_device = gpu_ctrl, \
size = options.L%(level)dTLBentries, \
assoc = options.L%(level)dTLBassoc, \
hitLatency = options.L%(level)dAccessLatency,\
missLatency1 = options.L%(level)dMissLatency,\
missLatency2 = options.L%(level)dMissLatency,\
maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
voltage = options.gpu_voltage)))" % locals()
else:
constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
assoc = options.L%(level)dTLBassoc, \
hitLatency = options.L%(level)dAccessLatency,\
missLatency2 = options.L%(level)dMissLatency,\
maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
accessDistance = options.L%(level)dAccessDistanceStat,\
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
voltage = options.gpu_voltage)))" % locals()
return constructor_call
def Coalescer_constructor(options, level, full_system):
if full_system:
constructor_call = "VegaTLBCoalescer(probesPerCycle = \
options.L%(level)dProbesPerCycle, \
tlb_level = %(level)d ,\
coalescingWindow = options.L%(level)dCoalescingWindow,\
disableCoalescing = options.L%(level)dDisableCoalescing,\
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
voltage = options.gpu_voltage)))" % locals()
else:
constructor_call = "TLBCoalescer(probesPerCycle = \
options.L%(level)dProbesPerCycle, \
coalescingWindow = options.L%(level)dCoalescingWindow,\
disableCoalescing = options.L%(level)dDisableCoalescing,\
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
voltage = options.gpu_voltage)))" % locals()
return constructor_call
def create_TLB_Coalescer(options, my_level, my_index, tlb_name,
coalescer_name, gpu_ctrl=None, full_system=False):
# arguments: options, TLB level, number of private structures for this
# Level, TLB name and Coalescer name
for i in range(my_index):
tlb_name.append(
eval(TLB_constructor(options, my_level, gpu_ctrl, full_system)))
coalescer_name.append(
eval(Coalescer_constructor(options, my_level, full_system)))
def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None,
full_system=False):
n_cu = options.num_compute_units
if options.TLB_config == "perLane":
num_TLBs = 64 * n_cu
elif options.TLB_config == "mono":
num_TLBs = 1
elif options.TLB_config == "perCU":
num_TLBs = n_cu
elif options.TLB_config == "2CU":
num_TLBs = n_cu >> 1
else:
print("Bad option for TLB Configuration.")
sys.exit(1)
#-------------------------------------------------------------------------
# A visual representation of the TLB hierarchy
# for ease of configuration
# < Modify here the width and the number of levels if you want a different
# configuration >
# width is the number of TLBs of the given type (i.e., D-TLB, I-TLB etc)
# for this level
L1 = [{'name': 'sqc', 'width': options.num_sqc, 'TLBarray': [],
'CoalescerArray': []},
{'name': 'scalar', 'width' : options.num_scalar_cache,
'TLBarray': [], 'CoalescerArray': []},
{'name': 'l1', 'width': num_TLBs, 'TLBarray': [],
'CoalescerArray': []}]
L2 = [{'name': 'l2', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
L3 = [{'name': 'l3', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
TLB_hierarchy = [L1, L2, L3]
#-------------------------------------------------------------------------
# Create the hiearchy
# Call the appropriate constructors and add objects to the system
for i in range(len(TLB_hierarchy)):
hierarchy_level = TLB_hierarchy[i]
level = i+1
for TLB_type in hierarchy_level:
TLB_index = TLB_type['width']
TLB_array = TLB_type['TLBarray']
Coalescer_array = TLB_type['CoalescerArray']
# If the sim calls for a fixed L1 TLB size across CUs,
# override the TLB entries option
if options.tot_L1TLB_size:
options.L1TLBentries = options.tot_L1TLB_size / num_TLBs
if options.L1TLBassoc > options.L1TLBentries:
options.L1TLBassoc = options.L1TLBentries
# call the constructors for the TLB and the Coalescer
create_TLB_Coalescer(options, level, TLB_index,\
TLB_array, Coalescer_array, gpu_ctrl, full_system)
system_TLB_name = TLB_type['name'] + '_tlb'
system_Coalescer_name = TLB_type['name'] + '_coalescer'
# add the different TLB levels to the system
# Modify here if you want to make the TLB hierarchy a child of
# the shader.
exec('system.%s = TLB_array' % system_TLB_name)
exec('system.%s = Coalescer_array' % system_Coalescer_name)
#===========================================================
# Specify the TLB hierarchy (i.e., port connections)
# All TLBs but the last level TLB need to have a memSidePort
#===========================================================
# Each TLB is connected with its Coalescer through a single port.
# There is a one-to-one mapping of TLBs to Coalescers at a given level
# This won't be modified no matter what the hierarchy looks like.
for i in range(len(TLB_hierarchy)):
hierarchy_level = TLB_hierarchy[i]
level = i+1
for TLB_type in hierarchy_level:
name = TLB_type['name']
for index in range(TLB_type['width']):
exec('system.%s_coalescer[%d].mem_side_ports[0] = \
system.%s_tlb[%d].cpu_side_ports[0]' % \
(name, index, name, index))
# Connect the cpuSidePort of all the coalescers in level 1
# < Modify here if you want a different configuration >
for TLB_type in L1:
name = TLB_type['name']
num_TLBs = TLB_type['width']
if name == 'l1': # L1 D-TLBs
tlb_per_cu = num_TLBs // n_cu
for cu_idx in range(n_cu):
if tlb_per_cu:
for tlb in range(tlb_per_cu):
exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
system.l1_coalescer[%d].cpu_side_ports[%d]' % \
(shader_idx, cu_idx, tlb,
cu_idx*tlb_per_cu+tlb, 0))
else:
exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
system.l1_coalescer[%d].cpu_side_ports[%d]' % \
(shader_idx, cu_idx, tlb_per_cu,
cu_idx / (n_cu / num_TLBs),
cu_idx % (n_cu / num_TLBs)))
elif name == 'sqc': # I-TLB
for index in range(n_cu):
sqc_tlb_index = index / options.cu_per_sqc
sqc_tlb_port_id = index % options.cu_per_sqc
exec('system.cpu[%d].CUs[%d].sqc_tlb_port = \
system.sqc_coalescer[%d].cpu_side_ports[%d]' % \
(shader_idx, index, sqc_tlb_index, sqc_tlb_port_id))
elif name == 'scalar': # Scalar D-TLB
for index in range(n_cu):
scalar_tlb_index = index / options.cu_per_scalar_cache
scalar_tlb_port_id = index % options.cu_per_scalar_cache
exec('system.cpu[%d].CUs[%d].scalar_tlb_port = \
system.scalar_coalescer[%d].cpu_side_ports[%d]' % \
(shader_idx, index, scalar_tlb_index,
scalar_tlb_port_id))
# Connect the memSidePorts of all the TLBs with the
# cpuSidePorts of the Coalescers of the next level
# < Modify here if you want a different configuration >
# L1 <-> L2
l2_coalescer_index = 0
for TLB_type in L1:
name = TLB_type['name']
for index in range(TLB_type['width']):
exec('system.%s_tlb[%d].mem_side_ports[0] = \
system.l2_coalescer[0].cpu_side_ports[%d]' % \
(name, index, l2_coalescer_index))
l2_coalescer_index += 1
# L2 <-> L3
system.l2_tlb[0].mem_side_ports[0] = \
system.l3_coalescer[0].cpu_side_ports[0]
# L3 TLB Vega page table walker to memory for full system only
if full_system:
for TLB_type in L3:
name = TLB_type['name']
for index in range(TLB_type['width']):
exec('system._dma_ports.append(system.%s_tlb[%d].walker)' % \
(name, index))
return system