stdlib,arch-arm: Add ruby cache support to the ArmBoard

This change adds ruby cache support to the ArmBoard. Previously
only classic caches were supported by the ArmBoard. The ArmBoard
was tested with CHI, MESI_Two_Level and MI_example caches from
the gem5's stdlib.

Change-Id: I480fe6ae13e3bd8438a425548ed113d443fcee40
Signed-off-by: Kaustav Goswami <kggoswami@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64011
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
diff --git a/configs/example/gem5_library/arm-ubuntu-boot-exit.py b/configs/example/gem5_library/arm-ubuntu-boot-exit.py
index 201fb23..70608ec 100644
--- a/configs/example/gem5_library/arm-ubuntu-boot-exit.py
+++ b/configs/example/gem5_library/arm-ubuntu-boot-exit.py
@@ -25,11 +25,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 """
-This script shows an example of booting an ARM based full system Ubuntu
-disk image using the gem5's standard library. This simulation boots the disk
-image using 2 TIMING CPU cores. The simulation ends when the startup is
-completed successfully (i.e. when an `m5_exit instruction is reached on
-successful boot).
+This script further shows an example of booting an ARM based full system Ubuntu
+disk image. This simulation boots the disk image using 2 TIMING CPU cores. The
+simulation ends when the startup is completed successfully (i.e. when an
+`m5_exit instruction is reached on successful boot).
 
 Usage
 -----
@@ -44,27 +43,26 @@
 from gem5.isas import ISA
 from m5.objects import ArmDefaultRelease
 from gem5.utils.requires import requires
+from gem5.resources.workload import Workload
 from gem5.simulate.simulator import Simulator
 from m5.objects import VExpress_GEM5_Foundation
+from gem5.coherence_protocol import CoherenceProtocol
 from gem5.components.boards.arm_board import ArmBoard
 from gem5.components.memory import DualChannelDDR4_2400
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.components.processors.simple_processor import SimpleProcessor
-from gem5.resources.workload import Workload
 
-# This runs a check to ensure the gem5 binary is compiled for ARM.
+
+# This runs a check to ensure the gem5 binary is compiled for ARM and the
+# protocol is CHI.
 
 requires(isa_required=ISA.ARM)
 
-# With ARM, we use simple caches.
-
 from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
     PrivateL1PrivateL2CacheHierarchy,
 )
 
-
 # Here we setup the parameters of the l1 and l2 caches.
-
 cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
     l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
 )
diff --git a/src/python/gem5/components/boards/arm_board.py b/src/python/gem5/components/boards/arm_board.py
index c814810..eec9432 100644
--- a/src/python/gem5/components/boards/arm_board.py
+++ b/src/python/gem5/components/boards/arm_board.py
@@ -50,10 +50,9 @@
 import m5
 from abc import ABCMeta
 from ...isas import ISA
-from typing import List
-from m5.util import fatal
 from ...utils.requires import requires
 from ...utils.override import overrides
+from typing import List, Sequence, Tuple
 from .abstract_board import AbstractBoard
 from ...resources.resource import AbstractResource
 from .kernel_disk_workload import KernelDiskWorkload
@@ -74,9 +73,7 @@
     Versatile(TM) Express family of boards.
 
     **Limitations**
-    * The board currently does not support ruby caches.
     * stage2 walker ports are ignored.
-    * This version does not support SECURITY extension.
     """
 
     __metaclass__ = ABCMeta
@@ -90,6 +87,11 @@
         platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(),
         release: ArmRelease = ArmDefaultRelease(),
     ) -> None:
+
+        # The platform and the clk has to be set before calling the super class
+        self._platform = platform
+        self._clk_freq = clk_freq
+
         super().__init__()
         AbstractBoard.__init__(
             self,
@@ -100,31 +102,14 @@
         )
 
         # This board requires ARM ISA to work.
-
         requires(isa_required=ISA.ARM)
 
-        # Setting the voltage domain here.
-
-        self.voltage_domain = self.clk_domain.voltage_domain
-
         # Setting up ARM release here. We use the ARM default release, which
         # corresponds to an ARMv8 system.
-
         self.release = release
 
-        # RealView sets up most of the on-chip and off-chip devices and GIC
-        # for the ARM board. These devices' iformation is also used to
-        # generate the dtb file.
-
-        self._setup_realview(platform)
-
-        # ArmBoard's memory can only be setup once realview is initialized.
-
-        self._setup_arm_memory_ranges()
-
         # Setting multi_proc of ArmSystem by counting the number of processors.
-
-        if processor.get_num_cores() != 1:
+        if processor.get_num_cores() == 1:
             self.multi_proc = False
         else:
             self.multi_proc = True
@@ -134,113 +119,162 @@
 
         # This board is expected to run full-system simulation.
         # Loading ArmFsLinux() from `src/arch/arm/ArmFsWorkload.py`
-
         self.workload = ArmFsLinux()
 
         # We are fixing the following variable for the ArmSystem to work. The
         # security extension is checked while generating the dtb file in
-        # realview. This board does not have security extention enabled.
-
+        # realview. This board does not have security extension enabled.
         self._have_psci = False
 
         # highest_el_is_64 is set to True. True if the register width of the
         # highest implemented exception level is 64 bits.
-
         self.highest_el_is_64 = True
 
         # Setting up the voltage and the clock domain here for the ARM board.
         # The ArmSystem/RealView expects voltage_domain to be a parameter.
         # The voltage and the clock frequency are taken from the devices.py
-        # file from configs/example/arm
-
+        # file from configs/example/arm. We set the clock to the same frequency
+        # as the user specified in the config script.
         self.voltage_domain = VoltageDomain(voltage="1.0V")
         self.clk_domain = SrcClockDomain(
-            clock="1GHz", voltage_domain=self.voltage_domain
+            clock=self._clk_freq, voltage_domain=self.voltage_domain
         )
 
         # The ARM board supports both Terminal and VncServer.
-
         self.terminal = Terminal()
         self.vncserver = VncServer()
 
         # Incoherent I/O Bus
-
         self.iobus = IOXBar()
         self.iobus.badaddr_responder = BadAddr()
         self.iobus.default = self.iobus.badaddr_responder.pio
 
+        # We now need to setup the dma_ports.
+        self._dma_ports = None
+
+        # An else part is not required as for CHI protocol, the dma_ports has
+        # to be set to []
+
+        # RealView sets up most of the on-chip and off-chip devices and GIC
+        # for the ARM board. These devices' information is also used to
+        # generate the dtb file. We then connect the I/O devices to the
+        # I/O bus.
+        self._setup_io_devices()
+
+        # Once the realview is setup, we can continue setting up the memory
+        # ranges. ArmBoard's memory can only be setup once realview is
+        # initialized.
+        memory = self.get_memory()
+        mem_size = memory.get_size()
+
+        # The following code is taken from configs/example/arm/devices.py. It
+        # sets up all the memory ranges for the board.
+        self.mem_ranges = []
+        success = False
+        for mem_range in self.realview._mem_regions:
+            size_in_range = min(mem_size, mem_range.size())
+            self.mem_ranges.append(
+                AddrRange(start=mem_range.start, size=size_in_range)
+            )
+
+            mem_size -= size_in_range
+            if mem_size == 0:
+                success = True
+                break
+
+        if success:
+            memory.set_memory_range(self.mem_ranges)
+        else:
+            raise ValueError("Memory size too big for platform capabilities")
+
+        # the image is initially set to None as a sanity check. This is
+        # overwritten in the method _setup_pci_devices.
+        self._image = None
+
+        # Calling _setup_pci_devices. DMA ports has to be setup beforehand. PCI
+        # devices has to be setup before adding disk to board as the dma_ports
+        # has to be correctly setup before incorporating ruby caches. The issue
+        # is that the dma_controllers can only be created correctly when we
+        # have the dma_ports for the PCI device. The current order of function
+        # calls is:
+        # ArmBoard                AbstractBoard          KernelDiskWorkload
+        # _setup_pci_devices() -> incorporate_cache() -> _add_disk_to_board()
+        self._setup_pci_devices()
+
     def _setup_io_devices(self) -> None:
         """
-        This method connects the I/O devices to the I/O bus.
-        """
-
-        # We setup the iobridge for the ARM Board. The default
-        # cache_hierarchy's NoCache class has an iobridge has a latency of
-        # 10. We are using an iobridge with latency = 50ns, taken from the
-        # configs/example/arm/devices.py
-
-        self.iobridge = Bridge(delay="50ns")
-        self.iobridge.mem_side_port = self.iobus.cpu_side_ports
-        self.iobridge.cpu_side_port = self.cache_hierarchy.get_mem_side_port()
-
-        # We either have iocache or dmabridge depending upon the
-        # cache_hierarchy. If we have "NoCache", then we use the dmabridge.
-        # Otherwise, we use the iocache on the board.
-
-        if isinstance(self.cache_hierarchy, NoCache) is False:
-
-            # The ArmBoard does not support ruby caches.
-
-            if self.get_cache_hierarchy().is_ruby():
-                fatal("Ruby caches are not supported by the ArmBoard.")
-
-            # The classic caches are setup in the  _setup_io_cache() method,
-            # defined under the cachehierarchy class. Verified it with both
-            # PrivateL1PrivateL2CacheHierarchy and PrivateL1CacheHierarchy
-            # classes.
-
-        else:
-
-            # This corresponds to a machine without caches. We have a DMA
-            # beidge in this case. Parameters of this bridge are also taken
-            # from the common/example/arm/devices.py file.
-
-            self.dmabridge = Bridge(delay="50ns", ranges=self.mem_ranges)
-
-            self.dmabridge.mem_side_port = self.get_dma_ports()[0]
-            self.dmabridge.cpu_side_port = self.get_dma_ports()[1]
-
-        self.realview.attachOnChipIO(
-            self.cache_hierarchy.membus, self.iobridge
-        )
-        self.realview.attachIO(self.iobus)
-
-    def _setup_realview(self, platform) -> None:
-        """
-        Notes:
-        The ARM Board has realview platform. Most of the on-chip and
-        off-chip devices are setup by the RealView platform. Currently, there
-        are 5 different types of realview platforms supported by the ArmBoard.
-
-        :param platform: the user can specify the platform while instantiating
-        an ArmBoard object.
+        This method first sets up the platform. ARM uses `realview` platform.
+        Most of the on-chip and off-chip devices are setup by the realview
+        platform. Once realview is setup, we connect the I/O devices to the
+        I/O bus.
         """
 
         # Currently, the ArmBoard supports VExpress_GEM5_V1,
         # VExpress_GEM5_V1_HDLcd and VExpress_GEM5_Foundation.
         # VExpress_GEM5_V2 and VExpress_GEM5_V2_HDLcd are not supported by the
         # ArmBoard.
-
-        self.realview = platform
+        self.realview = self._platform
 
         # We need to setup the global interrupt controller (GIC) addr for the
         # realview system.
-
         if hasattr(self.realview.gic, "cpu_addr"):
             self.gic_cpu_addr = self.realview.gic.cpu_addr
 
-    def _setup_io_cache(self):
-        pass
+        # IO devices has to setup before incorporating the caches in the case
+        # of ruby caches. Otherwise the DMA controllers are incorrectly
+        # created. The IO device has to be attached first. This is done in the
+        # realview class.
+        if self.get_cache_hierarchy().is_ruby():
+
+            # All the on-chip devices are attached in this method.
+            self.realview.attachOnChipIO(
+                self.iobus,
+                dma_ports=self.get_dma_ports(),
+                mem_ports=self.get_memory().get_mem_ports(),
+            )
+            self.realview.attachIO(self.iobus, dma_ports=self.get_dma_ports())
+
+        else:
+            # We either have iocache or dmabridge depending upon the
+            # cache_hierarchy. If we have "NoCache", then we use the dmabridge.
+            # Otherwise, we use the iocache on the board.
+
+            # We setup the iobridge for the ARM Board. The default
+            # cache_hierarchy's NoCache class has an iobridge has a latency
+            # of 10. We are using an iobridge with latency = 50ns, taken
+            # from the configs/example/arm/devices.py.
+            self.iobridge = Bridge(delay="50ns")
+            self.iobridge.mem_side_port = self.iobus.cpu_side_ports
+            self.iobridge.cpu_side_port = (
+                self.cache_hierarchy.get_mem_side_port()
+            )
+
+            if isinstance(self.cache_hierarchy, NoCache) is True:
+                # This corresponds to a machine without caches. We have a DMA
+                # bridge in this case. Parameters of this bridge are also taken
+                # from the common/example/arm/devices.py file.
+                self.dmabridge = Bridge(delay="50ns", ranges=self.mem_ranges)
+                self.dmabridge.mem_side_port = (
+                    self.cache_hierarchy.get_cpu_side_port()
+                )
+                self.dmabridge.cpu_side_port = self.iobus.mem_side_ports
+
+            # The classic caches are setup in the  _setup_io_cache() method
+            # defined under the cachehierarchy class. Verified it with both
+            # PrivateL1PrivateL2CacheHierarchy and PrivateL1CacheHierarchy
+            # classes.
+            self.realview.attachOnChipIO(
+                self.cache_hierarchy.membus, self.iobridge
+            )
+            self.realview.attachIO(self.iobus)
+
+    @overrides(AbstractBoard)
+    def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+        all_ports = [
+            (self.realview.bootmem.range, self.realview.bootmem.port),
+        ] + self.get_memory().get_mem_ports()
+
+        return all_ports
 
     @overrides(AbstractBoard)
     def has_io_bus(self) -> bool:
@@ -248,10 +282,20 @@
 
     @overrides(AbstractBoard)
     def get_io_bus(self) -> IOXBar:
-        return [self.iobus.cpu_side_ports, self.iobus.mem_side_ports]
+        return self.iobus
 
     @overrides(AbstractBoard)
     def has_coherent_io(self) -> bool:
+        # The setup of the caches gets a little tricky here. We need to
+        # override the default cache_hierarchy.iobridge due to different delay
+        # values (see method _setup_io_devices()). One way to do it would be to
+        # prevent creating cache_hierarchy.iobridge altogether. We trick
+        # NoCache() to assume that this board has no coherent_io and we we
+        # simply setup our own iobridge in the _setup_io_devices() method.
+        if isinstance(self.cache_hierarchy, NoCache):
+            return False
+        # In all other cases, we use the default values setup in the
+        # respective cache hierarchy class.
         return True
 
     @overrides(AbstractBoard)
@@ -262,15 +306,17 @@
     def has_dma_ports(self) -> bool:
         return True
 
-    def _setup_coherent_io_bridge(self, board: AbstractBoard) -> None:
-        pass
-
     @overrides(AbstractBoard)
     def get_dma_ports(self) -> List[Port]:
-        return [
-            self.cache_hierarchy.get_cpu_side_port(),
-            self.iobus.mem_side_ports,
-        ]
+        # The DMA ports differ depending upon the cache hierarchy. The method
+        # self.set_dma_ports takes care of that. In the case of ruby caches,
+        # this method should initially return an empty list.
+        if self.cache_hierarchy.is_ruby():
+            if self._dma_ports is None:
+                self._dma_ports = []
+
+        # _dma_ports should always be empty for classic caches.
+        return self._dma_ports
 
     @overrides(AbstractBoard)
     def connect_system_port(self, port: Port) -> None:
@@ -280,91 +326,60 @@
     def get_disk_device(self):
         return "/dev/vda"
 
-    @overrides(KernelDiskWorkload)
-    def _add_disk_to_board(self, disk_image: AbstractResource):
+    def _setup_pci_devices(self):
 
-        # We define the image.
+        # We define the image. The _image has to be None initially.
+        assert self._image is None
 
-        image = CowDiskImage(
+        self._image = CowDiskImage(
             child=RawDiskImage(read_only=True), read_only=False
         )
 
-        self.pci_devices = [PciVirtIO(vio=VirtIOBlock(image=image))]
-        self.realview.attachPciDevice(self.pci_devices[0], self.iobus)
+        self.pci_devices = [PciVirtIO(vio=VirtIOBlock(image=self._image))]
+
+        for device in self.pci_devices:
+            self.realview.attachPciDevice(
+                device, self.iobus, dma_ports=self.get_dma_ports()
+            )
+
+    @overrides(KernelDiskWorkload)
+    def _add_disk_to_board(self, disk_image: AbstractResource):
+
+        assert self._image is not None
 
         # Now that the disk and workload are set, we can generate the device
         # tree file. We will generate the dtb file everytime the board is
         # boot-up.
-
-        image.child.image_file = disk_image.get_local_path()
-
-        # _setup_io_devices needs to be implemented.
-
-        self._setup_io_devices()
+        self._image.child.image_file = disk_image.get_local_path()
 
         # Specifying the dtb file location to the workload.
-
         self.workload.dtb_filename = os.path.join(
             m5.options.outdir, "device.dtb"
         )
 
         # Calling generateDtb from class ArmSystem to add memory information to
         # the dtb file.
-
         self.generateDtb(self.workload.dtb_filename)
 
         # Finally we need to setup the bootloader for the ArmBoard. An ARM
         # system requires three inputs to simulate a full system: a disk image,
         # the kernel file and the bootloader file(s).
-
         self.realview.setupBootLoader(
             self, self.workload.dtb_filename, self._bootloader
         )
 
-    def _get_memory_ranges(self, mem_size) -> list:
-        """
-        This method is taken from configs/example/arm/devices.py. It sets up
-        all the memory ranges for the board.
-        """
-        mem_ranges = []
-
-        for mem_range in self.realview._mem_regions:
-            size_in_range = min(mem_size, mem_range.size())
-            mem_ranges.append(
-                AddrRange(start=mem_range.start, size=size_in_range)
-            )
-
-            mem_size -= size_in_range
-            if mem_size == 0:
-                return mem_ranges
-
-        raise ValueError("Memory size too big for platform capabilities")
-
     @overrides(AbstractBoard)
     def _setup_memory_ranges(self) -> None:
         """
-        The ArmBoard's memory can only be setup after realview is setup. Once
-        realview is initialized, we call _setup_arm_memory_ranges() to
-        correctly setup the memory ranges.
+        The ArmBoard's memory can only be setup after realview is setup. We set
+        this up in the `_setup_board` function.
         """
         pass
 
-    def _setup_arm_memory_ranges(self) -> None:
-
-        # We setup the memory here. The memory size is specified in the run
-        # script that the user uses.
-
-        memory = self.get_memory()
-        mem_size = memory.get_size()
-
-        self.mem_ranges = self._get_memory_ranges(mem_size)
-        memory.set_memory_range(self.mem_ranges)
-
     @overrides(KernelDiskWorkload)
     def get_default_kernel_args(self) -> List[str]:
 
         # The default kernel string is taken from the devices.py file.
-
         return [
             "console=ttyAMA0",
             "lpj=19988480",
diff --git a/tests/gem5/arm-boot-tests/test_linux_boot.py b/tests/gem5/arm-boot-tests/test_linux_boot.py
index 13a74a3..9a6f671 100644
--- a/tests/gem5/arm-boot-tests/test_linux_boot.py
+++ b/tests/gem5/arm-boot-tests/test_linux_boot.py
@@ -39,13 +39,13 @@
 def test_boot(
     cpu: str,
     num_cpus: int,
-    cache_type: str,
+    mem_system: str,
     memory_class: str,
     length: str,
     to_tick: Optional[int] = None,
 ):
 
-    name = f"{cpu}-cpu_{num_cpus}-cores_{cache_type}_{memory_class}_\
+    name = f"{cpu}-cpu_{num_cpus}-cores_{mem_system}_{memory_class}_\
 arm-boot-test"
 
     verifiers = []
@@ -56,7 +56,7 @@
         "--num-cpus",
         str(num_cpus),
         "--mem-system",
-        cache_type,
+        mem_system,
         "--dram-class",
         memory_class,
         "--resource-directory",
@@ -75,6 +75,19 @@
     else:
         name += "_m5-exit"
 
+    if mem_system == "chi":
+        protocol_to_use = "CHI"
+        isa_to_use = constants.arm_tag
+    elif mem_system == "mesi_two_level":
+        protocol_to_use = None
+        isa_to_use = constants.all_compiled_tag
+    elif mem_system == "mi_example":
+        protocol_to_use = "MI_example"
+        isa_to_use = constants.arm_tag
+    else:
+        protocol_to_use = None
+        isa_to_use = constants.all_compiled_tag
+
     gem5_verify_config(
         name=name,
         verifiers=verifiers,
@@ -87,9 +100,10 @@
             "arm_boot_exit_run.py",
         ),
         config_args=config_args,
-        valid_isas=(constants.all_compiled_tag,),
+        valid_isas=(isa_to_use,),
         valid_hosts=constants.supported_hosts,
         length=length,
+        protocol=protocol_to_use,
     )
 
 
@@ -98,7 +112,7 @@
 test_boot(
     cpu="atomic",
     num_cpus=1,
-    cache_type="classic",
+    mem_system="classic",
     memory_class="SingleChannelDDR3_1600",
     length=constants.quick_tag,
     to_tick=10000000000,
@@ -107,7 +121,7 @@
 test_boot(
     cpu="timing",
     num_cpus=1,
-    cache_type="classic",
+    mem_system="classic",
     memory_class="SingleChannelDDR3_2133",
     length=constants.quick_tag,
     to_tick=10000000000,
@@ -116,7 +130,7 @@
 test_boot(
     cpu="o3",
     num_cpus=1,
-    cache_type="classic",
+    mem_system="classic",
     memory_class="DualChannelDDR3_1600",
     length=constants.quick_tag,
     to_tick=10000000000,
@@ -124,9 +138,45 @@
 
 test_boot(
     cpu="timing",
-    num_cpus=4,
-    cache_type="classic",
-    memory_class="DualChannelDDR3_2133",
+    num_cpus=2,
+    mem_system="classic",
+    memory_class="DualChannelDDR4_2400",
+    length=constants.quick_tag,
+    to_tick=10000000000,
+)
+
+test_boot(
+    cpu="timing",
+    num_cpus=2,
+    mem_system="no_cache",
+    memory_class="DualChannelDDR4_2400",
+    length=constants.quick_tag,
+    to_tick=10000000000,
+)
+
+test_boot(
+    cpu="timing",
+    num_cpus=2,
+    mem_system="chi",
+    memory_class="DualChannelDDR4_2400",
+    length=constants.quick_tag,
+    to_tick=10000000000,
+)
+
+test_boot(
+    cpu="timing",
+    num_cpus=2,
+    mem_system="mesi_two_level",
+    memory_class="DualChannelDDR4_2400",
+    length=constants.quick_tag,
+    to_tick=10000000000,
+)
+
+test_boot(
+    cpu="timing",
+    num_cpus=2,
+    mem_system="mi_example",
+    memory_class="DualChannelDDR4_2400",
     length=constants.quick_tag,
     to_tick=10000000000,
 )
@@ -136,7 +186,7 @@
 test_boot(
     cpu="atomic",
     num_cpus=1,
-    cache_type="classic",
+    mem_system="classic",
     memory_class="SingleChannelDDR3_1600",
     length=constants.long_tag,
 )
@@ -144,7 +194,7 @@
 test_boot(
     cpu="timing",
     num_cpus=1,
-    cache_type="classic",
+    mem_system="classic",
     memory_class="SingleChannelDDR3_2133",
     length=constants.long_tag,
 )
@@ -152,7 +202,7 @@
 test_boot(
     cpu="o3",
     num_cpus=1,
-    cache_type="classic",
+    mem_system="classic",
     memory_class="DualChannelDDR3_1600",
     length=constants.long_tag,
 )
@@ -160,7 +210,47 @@
 test_boot(
     cpu="timing",
     num_cpus=4,
-    cache_type="classic",
+    mem_system="classic",
     memory_class="HBM2Stack",
     length=constants.long_tag,
 )
+
+test_boot(
+    cpu="timing",
+    num_cpus=2,
+    mem_system="classic",
+    memory_class="DualChannelDDR4_2400",
+    length=constants.long_tag,
+)
+
+test_boot(
+    cpu="timing",
+    num_cpus=2,
+    mem_system="no_cache",
+    memory_class="DualChannelDDR4_2400",
+    length=constants.long_tag,
+)
+
+test_boot(
+    cpu="timing",
+    num_cpus=2,
+    mem_system="chi",
+    memory_class="DualChannelDDR4_2400",
+    length=constants.long_tag,
+)
+
+test_boot(
+    cpu="timing",
+    num_cpus=2,
+    mem_system="mesi_two_level",
+    memory_class="DualChannelDDR4_2400",
+    length=constants.long_tag,
+)
+
+test_boot(
+    cpu="timing",
+    num_cpus=2,
+    mem_system="mi_example",
+    memory_class="DualChannelDDR4_2400",
+    length=constants.long_tag,
+)
diff --git a/tests/gem5/configs/arm_boot_exit_run.py b/tests/gem5/configs/arm_boot_exit_run.py
index 1e8b52a..aea3c41 100644
--- a/tests/gem5/configs/arm_boot_exit_run.py
+++ b/tests/gem5/configs/arm_boot_exit_run.py
@@ -39,6 +39,7 @@
 from gem5.resources.resource import Resource
 from gem5.simulate.simulator import Simulator
 from m5.objects import VExpress_GEM5_Foundation
+from gem5.coherence_protocol import CoherenceProtocol
 from gem5.components.boards.arm_board import ArmBoard
 from gem5.components.processors.simple_processor import SimpleProcessor
 from gem5.components.processors.cpu_types import (
@@ -75,7 +76,7 @@
     "-m",
     "--mem-system",
     type=str,
-    choices=("classic"),
+    choices=("no_cache", "classic", "chi", "mesi_two_level", "mi_example"),
     required=True,
     help="The memory system.",
 )
@@ -110,15 +111,54 @@
 # Run a check to ensure the right version of gem5 is being used.
 requires(isa_required=ISA.ARM)
 
-if args.mem_system == "classic":
+if args.mem_system == "no_cache":
+    from gem5.components.cachehierarchies.classic.no_cache import NoCache
+
+    cache_hierarchy = NoCache()
+
+elif args.mem_system == "classic":
     from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
         PrivateL1PrivateL2CacheHierarchy,
     )
 
-    # Setup the cache hierarchy.
     cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
         l1d_size="32KiB", l1i_size="32KiB", l2_size="512KiB"
     )
+
+elif args.mem_system == "chi":
+    requires(coherence_protocol_required=CoherenceProtocol.CHI)
+    from gem5.components.cachehierarchies.chi.private_l1_cache_hierarchy import (
+        PrivateL1CacheHierarchy,
+    )
+
+    cache_hierarchy = PrivateL1CacheHierarchy(
+        size="16kB",
+        assoc=4,
+    )
+
+elif args.mem_system == "mesi_two_level":
+    requires(coherence_protocol_required=CoherenceProtocol.MESI_TWO_LEVEL)
+    from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import (
+        MESITwoLevelCacheHierarchy,
+    )
+
+    cache_hierarchy = MESITwoLevelCacheHierarchy(
+        l1d_size="32kB",
+        l1d_assoc=8,
+        l1i_size="32kB",
+        l1i_assoc=8,
+        l2_size="256kB",
+        l2_assoc=16,
+        num_l2_banks=2,
+    )
+
+elif args.mem_system == "mi_example":
+    requires(coherence_protocol_required=CoherenceProtocol.MI_EXAMPLE)
+    from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import (
+        MIExampleCacheHierarchy,
+    )
+
+    cache_hierarchy = MIExampleCacheHierarchy(size="32kB", assoc=4)
 else:
     raise NotImplementedError(
         "Memory type '{}' is not supported in the boot tests.".format(