configs/common/GPUTLBConfig.py - public/gem5 - Git at Google

 # Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #
 # 1. Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 #
 # 2. Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 #
 # 3. Neither the name of the copyright holder nor the names of its
 # contributors may be used to endorse or promote products derived from this
 # software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

 # Configure the TLB hierarchy
 # Places which would probably need to be modified if you
 # want a different hierarchy are specified by a <Modify here .. >'
 # comment
 import m5
 from m5.objects import *

 def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):

     if full_system:
         constructor_call = "VegaGPUTLB(\
                 gpu_device = gpu_ctrl, \
                 size = options.L%(level)dTLBentries, \
                 assoc = options.L%(level)dTLBassoc, \
                 hitLatency = options.L%(level)dAccessLatency,\
                 missLatency1 = options.L%(level)dMissLatency,\
                 missLatency2 = options.L%(level)dMissLatency,\
                 maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
                 clk_domain = SrcClockDomain(\
                     clock = options.gpu_clock,\
                     voltage_domain = VoltageDomain(\
                         voltage = options.gpu_voltage)))" % locals()
     else:
         constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
                 assoc = options.L%(level)dTLBassoc, \
                 hitLatency = options.L%(level)dAccessLatency,\
                 missLatency2 = options.L%(level)dMissLatency,\
                 maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
                 accessDistance = options.L%(level)dAccessDistanceStat,\
                 clk_domain = SrcClockDomain(\
                     clock = options.gpu_clock,\
                     voltage_domain = VoltageDomain(\
                         voltage = options.gpu_voltage)))" % locals()
     return constructor_call

 def Coalescer_constructor(options, level, full_system):

     if full_system:
         constructor_call = "VegaTLBCoalescer(probesPerCycle = \
             options.L%(level)dProbesPerCycle, \
             tlb_level  = %(level)d ,\
             coalescingWindow = options.L%(level)dCoalescingWindow,\
             disableCoalescing = options.L%(level)dDisableCoalescing,\
             clk_domain = SrcClockDomain(\
                 clock = options.gpu_clock,\
                 voltage_domain = VoltageDomain(\
                     voltage = options.gpu_voltage)))" % locals()
     else:
         constructor_call = "TLBCoalescer(probesPerCycle = \
             options.L%(level)dProbesPerCycle, \
             coalescingWindow = options.L%(level)dCoalescingWindow,\
             disableCoalescing = options.L%(level)dDisableCoalescing,\
             clk_domain = SrcClockDomain(\
                 clock = options.gpu_clock,\
                 voltage_domain = VoltageDomain(\
                     voltage = options.gpu_voltage)))" % locals()
     return constructor_call

 def create_TLB_Coalescer(options, my_level, my_index, tlb_name,
                          coalescer_name, gpu_ctrl=None, full_system=False):
     # arguments: options, TLB level, number of private structures for this
     # Level, TLB name and  Coalescer name
     for i in range(my_index):
         tlb_name.append(
             eval(TLB_constructor(options, my_level, gpu_ctrl, full_system)))
         coalescer_name.append(
             eval(Coalescer_constructor(options, my_level, full_system)))

 def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None,
                          full_system=False):
     n_cu = options.num_compute_units

     if options.TLB_config == "perLane":
         num_TLBs = 64 * n_cu
     elif options.TLB_config == "mono":
         num_TLBs = 1
     elif options.TLB_config == "perCU":
         num_TLBs = n_cu
     elif options.TLB_config == "2CU":
         num_TLBs = n_cu >> 1
     else:
         print("Bad option for TLB Configuration.")
         sys.exit(1)

     #-------------------------------------------------------------------------
     # A visual representation of the TLB hierarchy
     # for ease of configuration
     # < Modify here the width and the number of levels if you want a different
     # configuration >
     # width is the number of TLBs of the given type (i.e., D-TLB, I-TLB etc)
     # for this level
     L1 = [{'name': 'sqc', 'width': options.num_sqc, 'TLBarray': [],
            'CoalescerArray': []},
           {'name': 'scalar', 'width' : options.num_scalar_cache,
            'TLBarray': [], 'CoalescerArray': []},
           {'name': 'l1', 'width': num_TLBs, 'TLBarray': [],
            'CoalescerArray': []}]

     L2 = [{'name': 'l2', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
     L3 = [{'name': 'l3', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]

     TLB_hierarchy = [L1, L2, L3]

     #-------------------------------------------------------------------------
     # Create the hiearchy
     # Call the appropriate constructors and add objects to the system

     for i in range(len(TLB_hierarchy)):
         hierarchy_level = TLB_hierarchy[i]
         level = i+1
         for TLB_type in hierarchy_level:
             TLB_index = TLB_type['width']
             TLB_array = TLB_type['TLBarray']
             Coalescer_array = TLB_type['CoalescerArray']
             # If the sim calls for a fixed L1 TLB size across CUs,
             # override the TLB entries option
             if options.tot_L1TLB_size:
                 options.L1TLBentries = options.tot_L1TLB_size / num_TLBs
                 if options.L1TLBassoc > options.L1TLBentries:
                     options.L1TLBassoc = options.L1TLBentries
             # call the constructors for the TLB and the Coalescer
             create_TLB_Coalescer(options, level, TLB_index,\
                 TLB_array, Coalescer_array, gpu_ctrl, full_system)

             system_TLB_name = TLB_type['name'] + '_tlb'
             system_Coalescer_name = TLB_type['name'] + '_coalescer'

             # add the different TLB levels to the system
             # Modify here if you want to make the TLB hierarchy a child of
             # the shader.
             exec('system.%s = TLB_array' % system_TLB_name)
             exec('system.%s = Coalescer_array' % system_Coalescer_name)

     #===========================================================
     # Specify the TLB hierarchy (i.e., port connections)
     # All TLBs but the last level TLB need to have a memSidePort
     #===========================================================

     # Each TLB is connected with its Coalescer through a single port.
     # There is a one-to-one mapping of TLBs to Coalescers at a given level
     # This won't be modified no matter what the hierarchy looks like.
     for i in range(len(TLB_hierarchy)):
         hierarchy_level = TLB_hierarchy[i]
         level = i+1
         for TLB_type in hierarchy_level:
             name = TLB_type['name']
             for index in range(TLB_type['width']):
                 exec('system.%s_coalescer[%d].mem_side_ports[0] = \
                         system.%s_tlb[%d].cpu_side_ports[0]' % \
                         (name, index, name, index))

     # Connect the cpuSidePort of all the coalescers in level 1
     # < Modify here if you want a different configuration >
     for TLB_type in L1:
         name = TLB_type['name']
         num_TLBs = TLB_type['width']
         if name == 'l1':     # L1 D-TLBs
             tlb_per_cu = num_TLBs // n_cu
             for cu_idx in range(n_cu):
                 if tlb_per_cu:
                     for tlb in range(tlb_per_cu):
                         exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
                                 system.l1_coalescer[%d].cpu_side_ports[%d]' % \
                                 (shader_idx, cu_idx, tlb,
                                     cu_idx*tlb_per_cu+tlb, 0))
                 else:
                     exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
                             system.l1_coalescer[%d].cpu_side_ports[%d]' % \
                             (shader_idx, cu_idx, tlb_per_cu,
                                 cu_idx / (n_cu / num_TLBs),
                                 cu_idx % (n_cu / num_TLBs)))
         elif name == 'sqc': # I-TLB
             for index in range(n_cu):
                 sqc_tlb_index = index / options.cu_per_sqc
                 sqc_tlb_port_id = index % options.cu_per_sqc
                 exec('system.cpu[%d].CUs[%d].sqc_tlb_port = \
                         system.sqc_coalescer[%d].cpu_side_ports[%d]' % \
                         (shader_idx, index, sqc_tlb_index, sqc_tlb_port_id))
         elif name == 'scalar': # Scalar D-TLB
             for index in range(n_cu):
                 scalar_tlb_index = index / options.cu_per_scalar_cache
                 scalar_tlb_port_id = index % options.cu_per_scalar_cache
                 exec('system.cpu[%d].CUs[%d].scalar_tlb_port = \
                         system.scalar_coalescer[%d].cpu_side_ports[%d]' % \
                         (shader_idx, index, scalar_tlb_index,
                          scalar_tlb_port_id))

     # Connect the memSidePorts of all the TLBs with the
     # cpuSidePorts of the Coalescers of the next level
     # < Modify here if you want a different configuration >
     # L1 <-> L2
     l2_coalescer_index = 0
     for TLB_type in L1:
         name = TLB_type['name']
         for index in range(TLB_type['width']):
             exec('system.%s_tlb[%d].mem_side_ports[0] = \
                     system.l2_coalescer[0].cpu_side_ports[%d]' % \
                     (name, index, l2_coalescer_index))
             l2_coalescer_index += 1

     # L2 <-> L3
     system.l2_tlb[0].mem_side_ports[0] = \
         system.l3_coalescer[0].cpu_side_ports[0]

     # L3 TLB Vega page table walker to memory for full system only
     if full_system:
         for TLB_type in L3:
             name = TLB_type['name']
             for index in range(TLB_type['width']):
                 exec('system._dma_ports.append(system.%s_tlb[%d].walker)' % \
                         (name, index))

     return system
	# Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
	# All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are met:
	#
	# 1. Redistributions of source code must retain the above copyright notice,
	# this list of conditions and the following disclaimer.
	#
	# 2. Redistributions in binary form must reproduce the above copyright notice,
	# this list of conditions and the following disclaimer in the documentation
	# and/or other materials provided with the distribution.
	#
	# 3. Neither the name of the copyright holder nor the names of its
	# contributors may be used to endorse or promote products derived from this
	# software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
	# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	# POSSIBILITY OF SUCH DAMAGE.

	# Configure the TLB hierarchy
	# Places which would probably need to be modified if you
	# want a different hierarchy are specified by a <Modify here .. >'
	# comment
	import m5
	from m5.objects import *

	def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):

	if full_system:
	constructor_call = "VegaGPUTLB(\
	gpu_device = gpu_ctrl, \
	size = options.L%(level)dTLBentries, \
	assoc = options.L%(level)dTLBassoc, \
	hitLatency = options.L%(level)dAccessLatency,\
	missLatency1 = options.L%(level)dMissLatency,\
	missLatency2 = options.L%(level)dMissLatency,\
	maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
	clk_domain = SrcClockDomain(\
	clock = options.gpu_clock,\
	voltage_domain = VoltageDomain(\
	voltage = options.gpu_voltage)))" % locals()
	else:
	constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
	assoc = options.L%(level)dTLBassoc, \
	hitLatency = options.L%(level)dAccessLatency,\
	missLatency2 = options.L%(level)dMissLatency,\
	maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
	accessDistance = options.L%(level)dAccessDistanceStat,\
	clk_domain = SrcClockDomain(\
	clock = options.gpu_clock,\
	voltage_domain = VoltageDomain(\
	voltage = options.gpu_voltage)))" % locals()
	return constructor_call

	def Coalescer_constructor(options, level, full_system):

	if full_system:
	constructor_call = "VegaTLBCoalescer(probesPerCycle = \
	options.L%(level)dProbesPerCycle, \
	tlb_level = %(level)d ,\
	coalescingWindow = options.L%(level)dCoalescingWindow,\
	disableCoalescing = options.L%(level)dDisableCoalescing,\
	clk_domain = SrcClockDomain(\
	clock = options.gpu_clock,\
	voltage_domain = VoltageDomain(\
	voltage = options.gpu_voltage)))" % locals()
	else:
	constructor_call = "TLBCoalescer(probesPerCycle = \
	options.L%(level)dProbesPerCycle, \
	coalescingWindow = options.L%(level)dCoalescingWindow,\
	disableCoalescing = options.L%(level)dDisableCoalescing,\
	clk_domain = SrcClockDomain(\
	clock = options.gpu_clock,\
	voltage_domain = VoltageDomain(\
	voltage = options.gpu_voltage)))" % locals()
	return constructor_call

	def create_TLB_Coalescer(options, my_level, my_index, tlb_name,
	coalescer_name, gpu_ctrl=None, full_system=False):
	# arguments: options, TLB level, number of private structures for this
	# Level, TLB name and Coalescer name
	for i in range(my_index):
	tlb_name.append(
	eval(TLB_constructor(options, my_level, gpu_ctrl, full_system)))
	coalescer_name.append(
	eval(Coalescer_constructor(options, my_level, full_system)))

	def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None,
	full_system=False):
	n_cu = options.num_compute_units

	if options.TLB_config == "perLane":
	num_TLBs = 64 * n_cu
	elif options.TLB_config == "mono":
	num_TLBs = 1
	elif options.TLB_config == "perCU":
	num_TLBs = n_cu
	elif options.TLB_config == "2CU":
	num_TLBs = n_cu >> 1
	else:
	print("Bad option for TLB Configuration.")
	sys.exit(1)

	#-------------------------------------------------------------------------
	# A visual representation of the TLB hierarchy
	# for ease of configuration
	# < Modify here the width and the number of levels if you want a different
	# configuration >
	# width is the number of TLBs of the given type (i.e., D-TLB, I-TLB etc)
	# for this level
	L1 = [{'name': 'sqc', 'width': options.num_sqc, 'TLBarray': [],
	'CoalescerArray': []},
	{'name': 'scalar', 'width' : options.num_scalar_cache,
	'TLBarray': [], 'CoalescerArray': []},
	{'name': 'l1', 'width': num_TLBs, 'TLBarray': [],
	'CoalescerArray': []}]

	L2 = [{'name': 'l2', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
	L3 = [{'name': 'l3', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]

	TLB_hierarchy = [L1, L2, L3]

	#-------------------------------------------------------------------------
	# Create the hiearchy
	# Call the appropriate constructors and add objects to the system

	for i in range(len(TLB_hierarchy)):
	hierarchy_level = TLB_hierarchy[i]
	level = i+1
	for TLB_type in hierarchy_level:
	TLB_index = TLB_type['width']
	TLB_array = TLB_type['TLBarray']
	Coalescer_array = TLB_type['CoalescerArray']
	# If the sim calls for a fixed L1 TLB size across CUs,
	# override the TLB entries option
	if options.tot_L1TLB_size:
	options.L1TLBentries = options.tot_L1TLB_size / num_TLBs
	if options.L1TLBassoc > options.L1TLBentries:
	options.L1TLBassoc = options.L1TLBentries
	# call the constructors for the TLB and the Coalescer
	create_TLB_Coalescer(options, level, TLB_index,\
	TLB_array, Coalescer_array, gpu_ctrl, full_system)

	system_TLB_name = TLB_type['name'] + '_tlb'
	system_Coalescer_name = TLB_type['name'] + '_coalescer'

	# add the different TLB levels to the system
	# Modify here if you want to make the TLB hierarchy a child of
	# the shader.
	exec('system.%s = TLB_array' % system_TLB_name)
	exec('system.%s = Coalescer_array' % system_Coalescer_name)

	#===========================================================
	# Specify the TLB hierarchy (i.e., port connections)
	# All TLBs but the last level TLB need to have a memSidePort
	#===========================================================

	# Each TLB is connected with its Coalescer through a single port.
	# There is a one-to-one mapping of TLBs to Coalescers at a given level
	# This won't be modified no matter what the hierarchy looks like.
	for i in range(len(TLB_hierarchy)):
	hierarchy_level = TLB_hierarchy[i]
	level = i+1
	for TLB_type in hierarchy_level:
	name = TLB_type['name']
	for index in range(TLB_type['width']):
	exec('system.%s_coalescer[%d].mem_side_ports[0] = \
	system.%s_tlb[%d].cpu_side_ports[0]' % \
	(name, index, name, index))

	# Connect the cpuSidePort of all the coalescers in level 1
	# < Modify here if you want a different configuration >
	for TLB_type in L1:
	name = TLB_type['name']
	num_TLBs = TLB_type['width']
	if name == 'l1': # L1 D-TLBs
	tlb_per_cu = num_TLBs // n_cu
	for cu_idx in range(n_cu):
	if tlb_per_cu:
	for tlb in range(tlb_per_cu):
	exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
	system.l1_coalescer[%d].cpu_side_ports[%d]' % \
	(shader_idx, cu_idx, tlb,
	cu_idx*tlb_per_cu+tlb, 0))
	else:
	exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
	system.l1_coalescer[%d].cpu_side_ports[%d]' % \
	(shader_idx, cu_idx, tlb_per_cu,
	cu_idx / (n_cu / num_TLBs),
	cu_idx % (n_cu / num_TLBs)))
	elif name == 'sqc': # I-TLB
	for index in range(n_cu):
	sqc_tlb_index = index / options.cu_per_sqc
	sqc_tlb_port_id = index % options.cu_per_sqc
	exec('system.cpu[%d].CUs[%d].sqc_tlb_port = \
	system.sqc_coalescer[%d].cpu_side_ports[%d]' % \
	(shader_idx, index, sqc_tlb_index, sqc_tlb_port_id))
	elif name == 'scalar': # Scalar D-TLB
	for index in range(n_cu):
	scalar_tlb_index = index / options.cu_per_scalar_cache
	scalar_tlb_port_id = index % options.cu_per_scalar_cache
	exec('system.cpu[%d].CUs[%d].scalar_tlb_port = \
	system.scalar_coalescer[%d].cpu_side_ports[%d]' % \
	(shader_idx, index, scalar_tlb_index,
	scalar_tlb_port_id))

	# Connect the memSidePorts of all the TLBs with the
	# cpuSidePorts of the Coalescers of the next level
	# < Modify here if you want a different configuration >
	# L1 <-> L2
	l2_coalescer_index = 0
	for TLB_type in L1:
	name = TLB_type['name']
	for index in range(TLB_type['width']):
	exec('system.%s_tlb[%d].mem_side_ports[0] = \
	system.l2_coalescer[0].cpu_side_ports[%d]' % \
	(name, index, l2_coalescer_index))
	l2_coalescer_index += 1

	# L2 <-> L3
	system.l2_tlb[0].mem_side_ports[0] = \
	system.l3_coalescer[0].cpu_side_ports[0]

	# L3 TLB Vega page table walker to memory for full system only
	if full_system:
	for TLB_type in L3:
	name = TLB_type['name']
	for index in range(TLB_type['width']):
	exec('system._dma_ports.append(system.%s_tlb[%d].walker)' % \
	(name, index))

	return system