configs/example/gpufs/system/amdgpu.py - public/gem5 - Git at Google

 # Copyright (c) 2021 Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # For use for simulation and test purposes only
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #
 # 1. Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 #
 # 2. Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 #
 # 3. Neither the name of the copyright holder nor the names of its
 # contributors may be used to endorse or promote products derived from this
 # software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

 import m5
 from m5.objects import *

 def createGPU(system, args):
     shader = Shader(n_wf = args.wfs_per_simd,
                     timing = True,
                     clk_domain = system.clk_domain)

     # VIPER GPU protocol implements release consistency at GPU side. So,
     # we make their writes visible to the global memory and should read
     # from global memory during kernal boundary. The pipeline initiates
     # (or do not initiate) the acquire/release operation depending on
     # these impl_kern_launch_rel and impl_kern_end_rel flags. The flag=true
     # means pipeline initiates a acquire/release operation at kernel launch/end
     # VIPER protocol is write-through based, and thus only impl_kern_launch_acq
     # needs to set.
     shader.impl_kern_launch_acq = True
     shader.impl_kern_end_rel = False

     # Switching off per-lane TLB by default
     per_lane = False
     if args.TLB_config == "perLane":
         per_lane = True

     # List of compute units; one GPU can have multiple compute units
     compute_units = []
     for i in range(args.num_compute_units):
         compute_units.append(
                  ComputeUnit(cu_id = i, perLaneTLB = per_lane,
                              num_SIMDs = args.simds_per_cu,
                              wf_size = args.wf_size,
                              spbypass_pipe_length = \
                                  args.sp_bypass_path_length,
                              dpbypass_pipe_length = \
                                  args.dp_bypass_path_length,
                              issue_period = args.issue_period,
                              coalescer_to_vrf_bus_width = \
                                  args.glbmem_rd_bus_width,
                              vrf_to_coalescer_bus_width = \
                                  args.glbmem_wr_bus_width,
                              num_global_mem_pipes = \
                                  args.glb_mem_pipes_per_cu,
                              num_shared_mem_pipes = \
                                  args.shr_mem_pipes_per_cu,
                              n_wf = args.wfs_per_simd,
                              execPolicy = args.CUExecPolicy,
                              localMemBarrier = args.LocalMemBarrier,
                              countPages = args.countPages,
                              localDataStore = \
                              LdsState(banks = args.numLdsBanks,
                                       bankConflictPenalty = \
                                           args.ldsBankConflictPenalty,
                                       size = args.lds_size)))

         wavefronts = []
         vrfs = []
         vrf_pool_mgrs = []
         srfs = []
         srf_pool_mgrs = []
         for j in range(args.simds_per_cu):
             for k in range(shader.n_wf):
                 wavefronts.append(Wavefront(simdId = j, wf_slot_id = k,
                                             wf_size = args.wf_size))

             if args.reg_alloc_policy == "simple":
                 vrf_pool_mgrs.append(SimplePoolManager(pool_size = \
                                                    args.vreg_file_size,
                                                    min_alloc = \
                                                    args.vreg_min_alloc))
                 srf_pool_mgrs.append(SimplePoolManager(pool_size = \
                                                    args.sreg_file_size,
                                                    min_alloc = \
                                                    args.vreg_min_alloc))
             elif args.reg_alloc_policy == "dynamic":
                 vrf_pool_mgrs.append(DynPoolManager(pool_size = \
                                                    args.vreg_file_size,
                                                    min_alloc = \
                                                    args.vreg_min_alloc))
                 srf_pool_mgrs.append(DynPoolManager(pool_size = \
                                                    args.sreg_file_size,
                                                    min_alloc = \
                                                    args.vreg_min_alloc))

             vrfs.append(VectorRegisterFile(simd_id=j, wf_size=args.wf_size,
                                            num_regs=args.vreg_file_size))

             srfs.append(ScalarRegisterFile(simd_id=j, wf_size=args.wf_size,
                                            num_regs=args.sreg_file_size))

         compute_units[-1].wavefronts = wavefronts
         compute_units[-1].vector_register_file = vrfs
         compute_units[-1].scalar_register_file = srfs
         compute_units[-1].register_manager = \
             RegisterManager(policy=args.registerManagerPolicy,
                             vrf_pool_managers=vrf_pool_mgrs,
                             srf_pool_managers=srf_pool_mgrs)
         if args.TLB_prefetch:
             compute_units[-1].prefetch_depth = args.TLB_prefetch
             compute_units[-1].prefetch_prev_type = args.pf_type

         # Attach the LDS and the CU to the bus (actually a Bridge)
         compute_units[-1].ldsPort = compute_units[-1].ldsBus.cpu_side_port
         compute_units[-1].ldsBus.mem_side_port = \
             compute_units[-1].localDataStore.cuPort

     # Attach compute units to GPU
     shader.CUs = compute_units

     shader.cpu_pointer = system.cpu[0]
     shader.eventq_index = 0
     shader.set_parent(system, "Shader")

     return shader

 def connectGPU(system, args):
     system.pc.south_bridge.gpu = AMDGPUDevice(pci_func=0, pci_dev=8, pci_bus=0)

     system.pc.south_bridge.gpu.trace_file = args.gpu_mmio_trace
     system.pc.south_bridge.gpu.rom_binary = args.gpu_rom
     system.pc.south_bridge.gpu.checkpoint_before_mmios = \
         args.checkpoint_before_mmios
	# Copyright (c) 2021 Advanced Micro Devices, Inc.
	# All rights reserved.
	#
	# For use for simulation and test purposes only
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are met:
	#
	# 1. Redistributions of source code must retain the above copyright notice,
	# this list of conditions and the following disclaimer.
	#
	# 2. Redistributions in binary form must reproduce the above copyright notice,
	# this list of conditions and the following disclaimer in the documentation
	# and/or other materials provided with the distribution.
	#
	# 3. Neither the name of the copyright holder nor the names of its
	# contributors may be used to endorse or promote products derived from this
	# software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
	# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	# POSSIBILITY OF SUCH DAMAGE.

	import m5
	from m5.objects import *

	def createGPU(system, args):
	shader = Shader(n_wf = args.wfs_per_simd,
	timing = True,
	clk_domain = system.clk_domain)

	# VIPER GPU protocol implements release consistency at GPU side. So,
	# we make their writes visible to the global memory and should read
	# from global memory during kernal boundary. The pipeline initiates
	# (or do not initiate) the acquire/release operation depending on
	# these impl_kern_launch_rel and impl_kern_end_rel flags. The flag=true
	# means pipeline initiates a acquire/release operation at kernel launch/end
	# VIPER protocol is write-through based, and thus only impl_kern_launch_acq
	# needs to set.
	shader.impl_kern_launch_acq = True
	shader.impl_kern_end_rel = False

	# Switching off per-lane TLB by default
	per_lane = False
	if args.TLB_config == "perLane":
	per_lane = True

	# List of compute units; one GPU can have multiple compute units
	compute_units = []
	for i in range(args.num_compute_units):
	compute_units.append(
	ComputeUnit(cu_id = i, perLaneTLB = per_lane,
	num_SIMDs = args.simds_per_cu,
	wf_size = args.wf_size,
	spbypass_pipe_length = \
	args.sp_bypass_path_length,
	dpbypass_pipe_length = \
	args.dp_bypass_path_length,
	issue_period = args.issue_period,
	coalescer_to_vrf_bus_width = \
	args.glbmem_rd_bus_width,
	vrf_to_coalescer_bus_width = \
	args.glbmem_wr_bus_width,
	num_global_mem_pipes = \
	args.glb_mem_pipes_per_cu,
	num_shared_mem_pipes = \
	args.shr_mem_pipes_per_cu,
	n_wf = args.wfs_per_simd,
	execPolicy = args.CUExecPolicy,
	localMemBarrier = args.LocalMemBarrier,
	countPages = args.countPages,
	localDataStore = \
	LdsState(banks = args.numLdsBanks,
	bankConflictPenalty = \
	args.ldsBankConflictPenalty,
	size = args.lds_size)))

	wavefronts = []
	vrfs = []
	vrf_pool_mgrs = []
	srfs = []
	srf_pool_mgrs = []
	for j in range(args.simds_per_cu):
	for k in range(shader.n_wf):
	wavefronts.append(Wavefront(simdId = j, wf_slot_id = k,
	wf_size = args.wf_size))

	if args.reg_alloc_policy == "simple":
	vrf_pool_mgrs.append(SimplePoolManager(pool_size = \
	args.vreg_file_size,
	min_alloc = \
	args.vreg_min_alloc))
	srf_pool_mgrs.append(SimplePoolManager(pool_size = \
	args.sreg_file_size,
	min_alloc = \
	args.vreg_min_alloc))
	elif args.reg_alloc_policy == "dynamic":
	vrf_pool_mgrs.append(DynPoolManager(pool_size = \
	args.vreg_file_size,
	min_alloc = \
	args.vreg_min_alloc))
	srf_pool_mgrs.append(DynPoolManager(pool_size = \
	args.sreg_file_size,
	min_alloc = \
	args.vreg_min_alloc))

	vrfs.append(VectorRegisterFile(simd_id=j, wf_size=args.wf_size,
	num_regs=args.vreg_file_size))

	srfs.append(ScalarRegisterFile(simd_id=j, wf_size=args.wf_size,
	num_regs=args.sreg_file_size))

	compute_units[-1].wavefronts = wavefronts
	compute_units[-1].vector_register_file = vrfs
	compute_units[-1].scalar_register_file = srfs
	compute_units[-1].register_manager = \
	RegisterManager(policy=args.registerManagerPolicy,
	vrf_pool_managers=vrf_pool_mgrs,
	srf_pool_managers=srf_pool_mgrs)
	if args.TLB_prefetch:
	compute_units[-1].prefetch_depth = args.TLB_prefetch
	compute_units[-1].prefetch_prev_type = args.pf_type

	# Attach the LDS and the CU to the bus (actually a Bridge)
	compute_units[-1].ldsPort = compute_units[-1].ldsBus.cpu_side_port
	compute_units[-1].ldsBus.mem_side_port = \
	compute_units[-1].localDataStore.cuPort

	# Attach compute units to GPU
	shader.CUs = compute_units

	shader.cpu_pointer = system.cpu[0]
	shader.eventq_index = 0
	shader.set_parent(system, "Shader")

	return shader

	def connectGPU(system, args):
	system.pc.south_bridge.gpu = AMDGPUDevice(pci_func=0, pci_dev=8, pci_bus=0)

	system.pc.south_bridge.gpu.trace_file = args.gpu_mmio_trace
	system.pc.south_bridge.gpu.rom_binary = args.gpu_rom
	system.pc.south_bridge.gpu.checkpoint_before_mmios = \
	args.checkpoint_before_mmios