| # Copyright (c) 2021 Advanced Micro Devices, Inc. |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are met: |
| # |
| # 1. Redistributions of source code must retain the above copyright notice, |
| # this list of conditions and the following disclaimer. |
| # |
| # 2. Redistributions in binary form must reproduce the above copyright notice, |
| # this list of conditions and the following disclaimer in the documentation |
| # and/or other materials provided with the distribution. |
| # |
| # 3. Neither the name of the copyright holder nor the names of its |
| # contributors may be used to endorse or promote products derived from this |
| # software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| # POSSIBILITY OF SUCH DAMAGE. |
| |
| from system.amdgpu import * |
| |
| from m5.util import panic |
| |
| from common.Benchmarks import * |
| from common.FSConfig import * |
| from common import GPUTLBConfig |
| from common import Simulation |
| from ruby import Ruby |
| |
| from example.gpufs.Disjoint_VIPER import * |
| |
| |
| def makeGpuFSSystem(args): |
| # Boot options are standard gem5 options plus: |
| # - Framebuffer device emulation 0 to reduce driver code paths. |
| # - Blacklist amdgpu as it cannot (currently) load in KVM CPU. |
| # - Blacklist psmouse as amdgpu driver adds proprietary commands that |
| # cause gem5 to panic. |
| boot_options = [ |
| "earlyprintk=ttyS0", |
| "console=ttyS0,9600", |
| "lpj=7999923", |
| "root=/dev/sda1", |
| "drm_kms_helper.fbdev_emulation=0", |
| "modprobe.blacklist=amdgpu", |
| "modprobe.blacklist=psmouse", |
| ] |
| cmdline = " ".join(boot_options) |
| |
| if MemorySize(args.mem_size) < MemorySize("2GB"): |
| panic("Need at least 2GB of system memory to load amdgpu module") |
| |
| # Use the common FSConfig to setup a Linux X86 System |
| (TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args) |
| disks = [args.disk_image] |
| if args.second_disk is not None: |
| disks.extend([args.second_disk]) |
| bm = SysConfig(disks=disks, mem=args.mem_size) |
| system = makeLinuxX86System( |
| test_mem_mode, args.num_cpus, bm, True, cmdline=cmdline |
| ) |
| system.workload.object_file = binary(args.kernel) |
| |
| # Set the cache line size for the entire system. |
| system.cache_line_size = args.cacheline_size |
| |
| # Create a top-level voltage and clock domain. |
| system.voltage_domain = VoltageDomain(voltage=args.sys_voltage) |
| system.clk_domain = SrcClockDomain( |
| clock=args.sys_clock, voltage_domain=system.voltage_domain |
| ) |
| |
| # Create a CPU voltage and clock domain. |
| system.cpu_voltage_domain = VoltageDomain() |
| system.cpu_clk_domain = SrcClockDomain( |
| clock=args.cpu_clock, voltage_domain=system.cpu_voltage_domain |
| ) |
| |
| # Setup VGA ROM region |
| system.shadow_rom_ranges = [AddrRange(0xC0000, size=Addr("128kB"))] |
| |
| # Create specified number of CPUs. GPUFS really only needs one. |
| system.cpu = [ |
| X86KvmCPU(clk_domain=system.cpu_clk_domain, cpu_id=i) |
| for i in range(args.num_cpus) |
| ] |
| system.kvm_vm = KvmVM() |
| |
| # Create AMDGPU and attach to southbridge |
| shader = createGPU(system, args) |
| connectGPU(system, args) |
| |
| # The shader core will be whatever is after the CPU cores are accounted for |
| shader_idx = args.num_cpus |
| system.cpu.append(shader) |
| |
| # This arbitrary address is something in the X86 I/O hole |
| hsapp_gpu_map_paddr = 0xE00000000 |
| hsapp_pt_walker = VegaPagetableWalker() |
| gpu_hsapp = HSAPacketProcessor( |
| pioAddr=hsapp_gpu_map_paddr, |
| numHWQueues=args.num_hw_queues, |
| walker=hsapp_pt_walker, |
| ) |
| dispatcher = GPUDispatcher() |
| cp_pt_walker = VegaPagetableWalker() |
| gpu_cmd_proc = GPUCommandProcessor( |
| hsapp=gpu_hsapp, dispatcher=dispatcher, walker=cp_pt_walker |
| ) |
| shader.dispatcher = dispatcher |
| shader.gpu_cmd_proc = gpu_cmd_proc |
| |
| system.pc.south_bridge.gpu.cp = gpu_cmd_proc |
| |
| # GPU Interrupt Handler |
| device_ih = AMDGPUInterruptHandler() |
| system.pc.south_bridge.gpu.device_ih = device_ih |
| |
| # Setup the SDMA engines |
| sdma0_pt_walker = VegaPagetableWalker() |
| sdma1_pt_walker = VegaPagetableWalker() |
| |
| sdma0 = SDMAEngine(walker=sdma0_pt_walker) |
| sdma1 = SDMAEngine(walker=sdma1_pt_walker) |
| |
| system.pc.south_bridge.gpu.sdma0 = sdma0 |
| system.pc.south_bridge.gpu.sdma1 = sdma1 |
| |
| # Setup PM4 packet processor |
| pm4_pkt_proc = PM4PacketProcessor() |
| system.pc.south_bridge.gpu.pm4_pkt_proc = pm4_pkt_proc |
| |
| # GPU data path |
| gpu_mem_mgr = AMDGPUMemoryManager() |
| system.pc.south_bridge.gpu.memory_manager = gpu_mem_mgr |
| |
| # CPU data path (SystemHub) |
| system_hub = AMDGPUSystemHub() |
| shader.system_hub = system_hub |
| |
| # GPU, HSAPP, and GPUCommandProc are DMA devices |
| system._dma_ports.append(gpu_hsapp) |
| system._dma_ports.append(gpu_cmd_proc) |
| system._dma_ports.append(system.pc.south_bridge.gpu) |
| system._dma_ports.append(sdma0) |
| system._dma_ports.append(sdma1) |
| system._dma_ports.append(device_ih) |
| system._dma_ports.append(pm4_pkt_proc) |
| system._dma_ports.append(system_hub) |
| system._dma_ports.append(gpu_mem_mgr) |
| system._dma_ports.append(hsapp_pt_walker) |
| system._dma_ports.append(cp_pt_walker) |
| system._dma_ports.append(sdma0_pt_walker) |
| system._dma_ports.append(sdma1_pt_walker) |
| |
| gpu_hsapp.pio = system.iobus.mem_side_ports |
| gpu_cmd_proc.pio = system.iobus.mem_side_ports |
| system.pc.south_bridge.gpu.pio = system.iobus.mem_side_ports |
| sdma0.pio = system.iobus.mem_side_ports |
| sdma1.pio = system.iobus.mem_side_ports |
| device_ih.pio = system.iobus.mem_side_ports |
| pm4_pkt_proc.pio = system.iobus.mem_side_ports |
| system_hub.pio = system.iobus.mem_side_ports |
| |
| # Full system needs special TLBs for SQC, Scalar, and vector data ports |
| args.full_system = True |
| GPUTLBConfig.config_tlb_hierarchy( |
| args, system, shader_idx, system.pc.south_bridge.gpu, True |
| ) |
| |
| # Create Ruby system using disjoint VIPER topology |
| system.ruby = Disjoint_VIPER() |
| system.ruby.create(args, system, system.iobus, system._dma_ports) |
| |
| # Create a seperate clock domain for Ruby |
| system.ruby.clk_domain = SrcClockDomain( |
| clock=args.ruby_clock, voltage_domain=system.voltage_domain |
| ) |
| |
| for (i, cpu) in enumerate(system.cpu): |
| # Break once we reach the shader "CPU" |
| if i == args.num_cpus: |
| break |
| |
| # |
| # Tie the cpu ports to the correct ruby system ports |
| # |
| cpu.clk_domain = system.cpu_clk_domain |
| cpu.createThreads() |
| cpu.createInterruptController() |
| |
| system.ruby._cpu_ports[i].connectCpuPorts(cpu) |
| |
| for j in range(len(system.cpu[i].isa)): |
| system.cpu[i].isa[j].vendor_string = "AuthenticAMD" |
| |
| if args.host_parallel: |
| # To get the KVM CPUs to run on different host CPUs, specify a |
| # different event queue for each CPU. The last CPU is a GPU |
| # shader and should be skipped. |
| for i, cpu in enumerate(system.cpu[:-1]): |
| for obj in cpu.descendants(): |
| obj.eventq_index = 0 |
| cpu.eventq_index = i + 1 |
| |
| gpu_port_idx = ( |
| len(system.ruby._cpu_ports) |
| - args.num_compute_units |
| - args.num_sqc |
| - args.num_scalar_cache |
| ) |
| gpu_port_idx = gpu_port_idx - args.num_cp * 2 |
| |
| # Connect token ports. For this we need to search through the list of all |
| # sequencers, since the TCP coalescers will not necessarily be first. Only |
| # TCP coalescers use a token port for back pressure. |
| token_port_idx = 0 |
| for i in range(len(system.ruby._cpu_ports)): |
| if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer): |
| system.cpu[shader_idx].CUs[ |
| token_port_idx |
| ].gmTokenPort = system.ruby._cpu_ports[i].gmTokenPort |
| token_port_idx += 1 |
| |
| wavefront_size = args.wf_size |
| for i in range(args.num_compute_units): |
| # The pipeline issues wavefront_size number of uncoalesced requests |
| # in one GPU issue cycle. Hence wavefront_size mem ports. |
| for j in range(wavefront_size): |
| system.cpu[shader_idx].CUs[i].memory_port[ |
| j |
| ] = system.ruby._cpu_ports[gpu_port_idx].in_ports[j] |
| gpu_port_idx += 1 |
| |
| for i in range(args.num_compute_units): |
| if i > 0 and not i % args.cu_per_sqc: |
| gpu_port_idx += 1 |
| system.cpu[shader_idx].CUs[i].sqc_port = system.ruby._cpu_ports[ |
| gpu_port_idx |
| ].in_ports |
| gpu_port_idx = gpu_port_idx + 1 |
| |
| for i in range(args.num_compute_units): |
| if i > 0 and not i % args.cu_per_scalar_cache: |
| gpu_port_idx += 1 |
| system.cpu[shader_idx].CUs[i].scalar_port = system.ruby._cpu_ports[ |
| gpu_port_idx |
| ].in_ports |
| gpu_port_idx = gpu_port_idx + 1 |
| |
| return system |