dev-arm: Implement a SMMUv3 model

This is an implementation of the SMMUv3 architecture.

What can it do?
- Single-stage and nested translation with 4k or 64k granule.  16k would
  be straightforward to add.
- Large pages are supported.
- Works with any gem5 device as long as it is issuing packets with a
  valid (Sub)StreamId

What it can't do?
- Fragment stage 1 page when the underlying stage 2 page is smaller.  S1
  page size > S2 page size is not supported
- Invalidations take zero time. This wouldn't be hard to fix.
- Checkpointing is not supported
- Stall/resume for faulting transactions is not supported

Additional contributors:
- Michiel W. van Tol <Michiel.VanTol@arm.com>
- Giacomo Travaglini <giacomo.travaglini@arm.com>

Change-Id: Ibc606fccd9199b2c1ba739c6335c846ffaa4d564
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/19008
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/dev/arm/SConscript b/src/dev/arm/SConscript
index 7d14abe..0b015e2 100644
--- a/src/dev/arm/SConscript
+++ b/src/dev/arm/SConscript
@@ -44,6 +44,7 @@
     SimObject('FlashDevice.py')
     SimObject('Gic.py')
     SimObject('RealView.py')
+    SimObject('SMMUv3.py')
     SimObject('UFSHostDevice.py')
     SimObject('EnergyCtrl.py')
     SimObject('NoMali.py')
@@ -66,6 +67,15 @@
     Source('pl111.cc')
     Source('hdlcd.cc')
     Source('kmi.cc')
+    Source('smmu_v3.cc');
+    Source('smmu_v3_caches.cc');
+    Source('smmu_v3_cmdexec.cc');
+    Source('smmu_v3_events.cc');
+    Source('smmu_v3_ports.cc');
+    Source('smmu_v3_proc.cc');
+    Source('smmu_v3_ptops.cc');
+    Source('smmu_v3_slaveifc.cc');
+    Source('smmu_v3_transl.cc');
     Source('timer_sp804.cc')
     Source('gpu_nomali.cc')
     Source('pci_host.cc')
@@ -88,6 +98,8 @@
     DebugFlag('GIC')
     DebugFlag('ITS')
     DebugFlag('RVCTRL')
+    DebugFlag('SMMUv3')
+    DebugFlag('SMMUv3Hazard')
     DebugFlag('EnergyCtrl')
     DebugFlag('UFSHostDevice')
     DebugFlag('VGIC')
diff --git a/src/dev/arm/SMMUv3.py b/src/dev/arm/SMMUv3.py
new file mode 100644
index 0000000..3c32508
--- /dev/null
+++ b/src/dev/arm/SMMUv3.py
@@ -0,0 +1,209 @@
+# Copyright (c) 2013, 2018-2019 ARM Limited
+# All rights reserved
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Stan Czerniawski
+#          Giacomo Travaglini
+
+from m5.params import *
+from m5.proxy import *
+from m5.util.fdthelper import *
+from m5.SimObject import *
+from MemObject import MemObject
+
+class SMMUv3SlaveInterface(MemObject):
+    type = 'SMMUv3SlaveInterface'
+    cxx_header = 'dev/arm/smmu_v3_slaveifc.hh'
+
+    slave = SlavePort('Device port')
+    ats_master = MasterPort('ATS master port')
+    ats_slave  = SlavePort('ATS slave port')
+
+    port_width = Param.Unsigned(16, 'Port width in bytes (= 1 beat)')
+    wrbuf_slots = Param.Unsigned(16, 'Write buffer size (in beats)')
+    xlate_slots = Param.Unsigned(16, 'Translation slots')
+
+    utlb_entries = Param.Unsigned(32, 'Micro TLB size (entries)')
+    utlb_assoc = Param.Unsigned(0, 'Micro TLB associativity (0=full)')
+    utlb_policy = Param.String('rr', 'Micro TLB replacement policy')
+    utlb_enable = Param.Bool(True, 'Micro TLB enable')
+    utlb_lat = Param.Cycles(1, 'Micro TLB lookup latency')
+    utlb_slots = Param.Cycles(1, 'Micro TLB lookup slots')
+
+    tlb_entries = Param.Unsigned(2048, 'Main TLB size (entries)')
+    tlb_assoc = Param.Unsigned(4, 'Main TLB associativity (0=full)')
+    tlb_policy = Param.String('rr', 'Main TLB replacement policy')
+    tlb_enable = Param.Bool(True, 'Main TLB enable')
+    tlb_lat = Param.Cycles(3, 'Main TLB lookup latency')
+    tlb_slots = Param.Cycles(3, 'Main TLB lookup slots')
+
+    prefetch_enable = Param.Bool(False,
+        'Enable prefetch')
+    prefetch_reserve_last_way = Param.Bool(True,
+        'Reserve last way of the main TLB for prefetched entries')
+
+class SMMUv3(MemObject):
+    type = 'SMMUv3'
+    cxx_header = 'dev/arm/smmu_v3.hh'
+
+    master = MasterPort('Master port')
+    master_walker = MasterPort(
+        'Master port for SMMU initiated HWTW requests (optional)')
+    control = SlavePort('Control port for accessing memory-mapped registers')
+    sample_period = Param.Clock('10us', 'Stats sample period')
+    reg_map = Param.AddrRange('Address range for control registers')
+    system = Param.System(Parent.any, "System this device is part of")
+
+    slave_interfaces = VectorParam.SMMUv3SlaveInterface([], "Slave interfaces")
+
+    # SLAVE INTERFACE<->SMMU link parameters
+    ifc_smmu_lat = Param.Cycles(8, 'IFC to SMMU communication latency')
+    smmu_ifc_lat = Param.Cycles(8, 'SMMU to IFC communication latency')
+
+    # SMMU parameters
+    xlate_slots = Param.Unsigned(64, 'SMMU translation slots')
+    ptw_slots = Param.Unsigned(16, 'SMMU page table walk slots')
+
+    master_port_width = Param.Unsigned(16,
+        'Master port width in bytes (= 1 beat)')
+
+    tlb_entries = Param.Unsigned(2048, 'TLB size (entries)')
+    tlb_assoc = Param.Unsigned(4, 'TLB associativity (0=full)')
+    tlb_policy = Param.String('rr', 'TLB replacement policy')
+    tlb_enable = Param.Bool(False, 'TLB enable')
+    tlb_lat = Param.Cycles(3, 'TLB lookup latency')
+    tlb_slots = Param.Cycles(3, 'TLB lookup slots')
+
+    cfg_entries = Param.Unsigned(64, 'Config cache size (entries)')
+    cfg_assoc = Param.Unsigned(4, 'Config cache associativity (0=full)')
+    cfg_policy = Param.String('rr', 'Config cache replacement policy')
+    cfg_enable = Param.Bool(True, 'Config cache enable')
+    cfg_lat = Param.Cycles(3, 'Config cache lookup latency')
+    cfg_slots = Param.Cycles(3, 'Config cache lookup slots')
+
+    ipa_entries = Param.Unsigned(128, 'IPA cache size (entries)')
+    ipa_assoc = Param.Unsigned(4, 'IPA cache associativity (0=full)')
+    ipa_policy = Param.String('rr', 'IPA cache replacement policy')
+    ipa_enable = Param.Bool(False, 'IPA cache enable')
+    ipa_lat = Param.Cycles(3, 'IPA cache lookup lantency')
+    ipa_slots = Param.Cycles(3, 'IPA cache lookup slots')
+
+    walk_S1L0 = Param.Unsigned(4, 'Walk cache S1L0 size (entries)')
+    walk_S1L1 = Param.Unsigned(28, 'Walk cache S1L1 size (entries)')
+    walk_S1L2 = Param.Unsigned(348, 'Walk cache S1L2 size (entries)')
+    walk_S1L3 = Param.Unsigned(4, 'Walk cache S1L3 size (entries)')
+    walk_S2L0 = Param.Unsigned(4, 'Walk cache S2L0 size (entries)')
+    walk_S2L1 = Param.Unsigned(28, 'Walk cache S2L1 size (entries)')
+    walk_S2L2 = Param.Unsigned(92, 'Walk cache S2L2 size (entries)')
+    walk_S2L3 = Param.Unsigned(4, 'Walk cache S2L3 size (entries)')
+    walk_assoc = Param.Unsigned(4, 'Walk cache associativity (0=full)')
+    walk_policy = Param.String('rr', 'Walk cache replacement policy')
+    walk_enable = Param.Bool(True, 'Walk cache enable')
+    wc_nonfinal_enable = Param.Bool(False,
+        'Nonfinal translations use walk cache')
+    wc_s1_levels = Param.Unsigned(7,
+        'S1 PT levels cached in walk cache (bit 0 is L0, bit 1 is L1, etc)')
+    wc_s2_levels = Param.Unsigned(7,
+        'S2 PT levels cached in walk cache (bit 0 is L0, bit 1 is L1, etc)')
+
+    walk_lat   = Param.Cycles(4, 'Walk cache lookup latency')
+    walk_slots = Param.Cycles(4, 'Walk cache lookup slots')
+
+    # [28:27] ST_LEVEL = 0b01, 2-level Stream Table supported in addition
+    # to Linear Stream table.
+    # [25:24] STALL_MODEL = 0b01, Stall is not supported, all faults
+    # terminate transaction.
+    # [22:21] TTENDIAN = 0b10, Endianness support for translation table walks
+    # (0b10 = Little-endian).
+    # [19] CD2L = 0b1, 2-level CD table supported.
+    # [18] VMID16 = 0b1, 16-bit VMID supported.
+    # [12] ASID16 = 0b1, 16-bit ASID supported.
+    # [3:2] TTF = 0b10, Translation Table Formats (Stage 1/2)
+    # (0b10 = AArch64).
+    # [1] S1P = 0b1, Stage 1 translation supported.
+    # [0] S2P = 0b1, Stage 2 translation supported.
+    smmu_idr0 = Param.UInt32(0x094C100F, "SMMU_IDR0 register");
+
+    # [25:21] CMDQS = 0b00101, Maximum number of Command queue entries
+    # as log 2 (entries) (0b00101 = 32 entries).
+    smmu_idr1 = Param.UInt32(0x00A00000, "SMMU_IDR1 register");
+
+    smmu_idr2 = Param.UInt32(0, "SMMU_IDR2 register");
+    smmu_idr3 = Param.UInt32(0, "SMMU_IDR3 register");
+    smmu_idr4 = Param.UInt32(0, "SMMU_IDR4 register");
+
+    # [6] GRAN64K = 0b1, 64KB translation granule supported.
+    # [4] GRAN4K = 0b1, 4KB translation granule supported.
+    # [2:0] OAS = 0b101, Output Address Size (0b101 = 48-bit).
+    smmu_idr5 = Param.UInt32(0x55, "SMMU_IDR5 register");
+    smmu_iidr = Param.UInt32(0, "SMMU_IIDR register");
+
+    # [7:0] (0 = SMMUv3.0) (1 = SMMUv3.1)
+    smmu_aidr = Param.UInt32(0, "SMMU_AIDR register");
+
+    def generateDeviceTree(self, state):
+        reg_addr = self.reg_map.start
+        reg_size = self.reg_map.size()
+        node = FdtNode("smmuv3@%x" % long(reg_addr))
+        node.appendCompatible("arm,smmu-v3")
+        node.append(FdtPropertyWords("reg",
+            state.addrCells(reg_addr) +
+            state.sizeCells(reg_size)))
+        node.append(FdtPropertyWords("#iommu-cells", [1]))
+
+        node.appendPhandle(self)
+        yield node
+
+    def connect(self, device, bus):
+        """
+        Helper method used to connect the SMMU. The master could
+        be either a dma port (if the SMMU is attached directly to a
+        dma device), or to a master port (this is the case where the SMMU
+        is attached to a bridge).
+        """
+
+        self.master = bus.slave
+        self.control = bus.master
+
+        slave_interface = SMMUv3SlaveInterface()
+
+        if hasattr(device, "master"):
+            slave_interface.slave = device.master
+        elif hasattr(device, "dma"):
+            slave_interface.slave = device.dma
+        else:
+            print("Unable to attach SMMUv3\n")
+            sys.exit(1)
+
+        self.slave_interfaces.append(slave_interface)
diff --git a/src/dev/arm/amba.hh b/src/dev/arm/amba.hh
new file mode 100644
index 0000000..cfc1942
--- /dev/null
+++ b/src/dev/arm/amba.hh
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Giacomo Travaglini
+ */
+
+#ifndef __DEV_ARM_AMBA_HH__
+#define __DEV_ARM_AMBA_HH__
+
+#include "mem/packet.hh"
+
+namespace AMBA
+{
+
+typedef MasterID OrderID;
+
+static OrderID
+orderId(PacketPtr pkt)
+{
+    return pkt->req->masterId();
+}
+
+} // namespace AMBA
+
+#endif // __DEV_ARM_AMBA_HH__
diff --git a/src/dev/arm/smmu_v3.cc b/src/dev/arm/smmu_v3.cc
new file mode 100644
index 0000000..2517649
--- /dev/null
+++ b/src/dev/arm/smmu_v3.cc
@@ -0,0 +1,781 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#include "dev/arm/smmu_v3.hh"
+
+#include <cstddef>
+#include <cstring>
+
+#include "base/bitfield.hh"
+#include "base/cast.hh"
+#include "base/logging.hh"
+#include "base/trace.hh"
+#include "base/types.hh"
+#include "debug/Checkpoint.hh"
+#include "debug/SMMUv3.hh"
+#include "dev/arm/smmu_v3_transl.hh"
+#include "mem/packet_access.hh"
+#include "sim/system.hh"
+
+SMMUv3::SMMUv3(SMMUv3Params *params) :
+    MemObject(params),
+    system(*params->system),
+    masterId(params->system->getMasterId(this)),
+    masterPort(name() + ".master", *this),
+    masterTableWalkPort(name() + ".master_walker", *this),
+    controlPort(name() + ".control", *this, params->reg_map),
+    tlb(params->tlb_entries, params->tlb_assoc, params->tlb_policy),
+    configCache(params->cfg_entries, params->cfg_assoc, params->cfg_policy),
+    ipaCache(params->ipa_entries, params->ipa_assoc, params->ipa_policy),
+    walkCache({ { params->walk_S1L0, params->walk_S1L1,
+                  params->walk_S1L2, params->walk_S1L3,
+                  params->walk_S2L0, params->walk_S2L1,
+                  params->walk_S2L2, params->walk_S2L3 } },
+              params->walk_assoc, params->walk_policy),
+    tlbEnable(params->tlb_enable),
+    configCacheEnable(params->cfg_enable),
+    ipaCacheEnable(params->ipa_enable),
+    walkCacheEnable(params->walk_enable),
+    tableWalkPortEnable(false),
+    walkCacheNonfinalEnable(params->wc_nonfinal_enable),
+    walkCacheS1Levels(params->wc_s1_levels),
+    walkCacheS2Levels(params->wc_s2_levels),
+    masterPortWidth(params->master_port_width),
+    tlbSem(params->tlb_slots),
+    ifcSmmuSem(1),
+    smmuIfcSem(1),
+    configSem(params->cfg_slots),
+    ipaSem(params->ipa_slots),
+    walkSem(params->walk_slots),
+    masterPortSem(1),
+    transSem(params->xlate_slots),
+    ptwSem(params->ptw_slots),
+    cycleSem(1),
+    tlbLat(params->tlb_lat),
+    ifcSmmuLat(params->ifc_smmu_lat),
+    smmuIfcLat(params->smmu_ifc_lat),
+    configLat(params->cfg_lat),
+    ipaLat(params->ipa_lat),
+    walkLat(params->walk_lat),
+    slaveInterfaces(params->slave_interfaces),
+    commandExecutor(name() + ".cmd_exec", *this),
+    regsMap(params->reg_map),
+    processCommandsEvent(this)
+{
+    fatal_if(regsMap.size() != SMMU_REG_SIZE,
+        "Invalid register map size: %#x different than SMMU_REG_SIZE = %#x\n",
+        regsMap.size(), SMMU_REG_SIZE);
+
+    // Init smmu registers to 0
+    memset(&regs, 0, sizeof(regs));
+
+    // Setup RO ID registers
+    regs.idr0 = params->smmu_idr0;
+    regs.idr1 = params->smmu_idr1;
+    regs.idr2 = params->smmu_idr2;
+    regs.idr3 = params->smmu_idr3;
+    regs.idr4 = params->smmu_idr4;
+    regs.idr5 = params->smmu_idr5;
+    regs.iidr = params->smmu_iidr;
+    regs.aidr = params->smmu_aidr;
+
+    // TODO: At the moment it possible to set the ID registers to hold
+    // any possible value. It would be nice to have a sanity check here
+    // at construction time in case some idx registers are programmed to
+    // store an unallowed values or if the are configuration conflicts.
+    warn("SMMUv3 IDx register values unchecked\n");
+
+    for (auto ifc : slaveInterfaces)
+        ifc->setSMMU(this);
+}
+
+bool
+SMMUv3::masterRecvTimingResp(PacketPtr pkt)
+{
+    DPRINTF(SMMUv3, "[t] master resp addr=%#x size=%#x\n",
+        pkt->getAddr(), pkt->getSize());
+
+    // @todo: We need to pay for this and not just zero it out
+    pkt->headerDelay = pkt->payloadDelay = 0;
+
+    SMMUProcess *proc =
+        safe_cast<SMMUProcess *>(pkt->popSenderState());
+
+    runProcessTiming(proc, pkt);
+
+    return true;
+}
+
+void
+SMMUv3::masterRecvReqRetry()
+{
+    assert(!packetsToRetry.empty());
+
+    while (!packetsToRetry.empty()) {
+        SMMUAction a = packetsToRetry.front();
+
+        assert(a.type==ACTION_SEND_REQ || a.type==ACTION_SEND_REQ_FINAL);
+
+        DPRINTF(SMMUv3, "[t] master retr addr=%#x size=%#x\n",
+            a.pkt->getAddr(), a.pkt->getSize());
+
+        if (!masterPort.sendTimingReq(a.pkt))
+            break;
+
+        packetsToRetry.pop();
+
+        /*
+         * ACTION_SEND_REQ_FINAL means that we have just forwarded the packet
+         * on the master interface; this means that we no longer hold on to
+         * that transaction and therefore can accept a new one.
+         * If the slave port was stalled then unstall it (send retry).
+         */
+        if (a.type == ACTION_SEND_REQ_FINAL)
+            scheduleSlaveRetries();
+    }
+}
+
+bool
+SMMUv3::masterTableWalkRecvTimingResp(PacketPtr pkt)
+{
+    DPRINTF(SMMUv3, "[t] master HWTW resp addr=%#x size=%#x\n",
+        pkt->getAddr(), pkt->getSize());
+
+    // @todo: We need to pay for this and not just zero it out
+    pkt->headerDelay = pkt->payloadDelay = 0;
+
+    SMMUProcess *proc =
+        safe_cast<SMMUProcess *>(pkt->popSenderState());
+
+    runProcessTiming(proc, pkt);
+
+    return true;
+}
+
+void
+SMMUv3::masterTableWalkRecvReqRetry()
+{
+    assert(tableWalkPortEnable);
+    assert(!packetsTableWalkToRetry.empty());
+
+    while (!packetsTableWalkToRetry.empty()) {
+        SMMUAction a = packetsTableWalkToRetry.front();
+
+        assert(a.type==ACTION_SEND_REQ);
+
+        DPRINTF(SMMUv3, "[t] master HWTW retr addr=%#x size=%#x\n",
+            a.pkt->getAddr(), a.pkt->getSize());
+
+        if (!masterTableWalkPort.sendTimingReq(a.pkt))
+            break;
+
+        packetsTableWalkToRetry.pop();
+    }
+}
+
+void
+SMMUv3::scheduleSlaveRetries()
+{
+    for (auto ifc : slaveInterfaces) {
+        ifc->scheduleDeviceRetry();
+    }
+}
+
+SMMUAction
+SMMUv3::runProcess(SMMUProcess *proc, PacketPtr pkt)
+{
+    if (system.isAtomicMode()) {
+        return runProcessAtomic(proc, pkt);
+    } else if (system.isTimingMode()) {
+        return runProcessTiming(proc, pkt);
+    } else {
+        panic("Not in timing or atomic mode!");
+    }
+}
+
+SMMUAction
+SMMUv3::runProcessAtomic(SMMUProcess *proc, PacketPtr pkt)
+{
+    SMMUAction action;
+    Tick delay = 0;
+    bool finished = false;
+
+    do {
+        action = proc->run(pkt);
+
+        switch (action.type) {
+            case ACTION_SEND_REQ:
+                // Send an MMU initiated request on the table walk port if it is
+                // enabled. Otherwise, fall through and handle same as the final
+                // ACTION_SEND_REQ_FINAL request.
+                if (tableWalkPortEnable) {
+                    delay += masterTableWalkPort.sendAtomic(action.pkt);
+                    pkt = action.pkt;
+                    break;
+                }
+                M5_FALLTHROUGH;
+            case ACTION_SEND_REQ_FINAL:
+                delay += masterPort.sendAtomic(action.pkt);
+                pkt = action.pkt;
+                break;
+
+            case ACTION_SEND_RESP:
+            case ACTION_SEND_RESP_ATS:
+            case ACTION_SLEEP:
+                finished = true;
+                break;
+
+            case ACTION_DELAY:
+                delay += action.delay;
+                break;
+
+            case ACTION_TERMINATE:
+                panic("ACTION_TERMINATE in atomic mode\n");
+
+            default:
+                panic("Unknown action\n");
+        }
+    } while (!finished);
+
+    action.delay = delay;
+
+    return action;
+}
+
+SMMUAction
+SMMUv3::runProcessTiming(SMMUProcess *proc, PacketPtr pkt)
+{
+    SMMUAction action = proc->run(pkt);
+
+    switch (action.type) {
+        case ACTION_SEND_REQ:
+            // Send an MMU initiated request on the table walk port if it is
+            // enabled. Otherwise, fall through and handle same as the final
+            // ACTION_SEND_REQ_FINAL request.
+            if (tableWalkPortEnable) {
+                action.pkt->pushSenderState(proc);
+
+                DPRINTF(SMMUv3, "[t] master HWTW req  addr=%#x size=%#x\n",
+                        action.pkt->getAddr(), action.pkt->getSize());
+
+                if (packetsTableWalkToRetry.empty()
+                        && masterTableWalkPort.sendTimingReq(action.pkt)) {
+                    scheduleSlaveRetries();
+                } else {
+                    DPRINTF(SMMUv3, "[t] master HWTW req  needs retry,"
+                            " qlen=%d\n", packetsTableWalkToRetry.size());
+                    packetsTableWalkToRetry.push(action);
+                }
+
+                break;
+            }
+            M5_FALLTHROUGH;
+        case ACTION_SEND_REQ_FINAL:
+            action.pkt->pushSenderState(proc);
+
+            DPRINTF(SMMUv3, "[t] master req  addr=%#x size=%#x\n",
+                    action.pkt->getAddr(), action.pkt->getSize());
+
+            if (packetsToRetry.empty() && masterPort.sendTimingReq(action.pkt)) {
+                scheduleSlaveRetries();
+            } else {
+                DPRINTF(SMMUv3, "[t] master req  needs retry, qlen=%d\n",
+                        packetsToRetry.size());
+                packetsToRetry.push(action);
+            }
+
+            break;
+
+        case ACTION_SEND_RESP:
+            // @todo: We need to pay for this and not just zero it out
+            action.pkt->headerDelay = action.pkt->payloadDelay = 0;
+
+            DPRINTF(SMMUv3, "[t] slave resp addr=%#x size=%#x\n",
+                    action.pkt->getAddr(),
+                    action.pkt->getSize());
+
+            assert(action.ifc);
+            action.ifc->schedTimingResp(action.pkt);
+
+            delete proc;
+            break;
+
+        case ACTION_SEND_RESP_ATS:
+            // @todo: We need to pay for this and not just zero it out
+            action.pkt->headerDelay = action.pkt->payloadDelay = 0;
+
+            DPRINTF(SMMUv3, "[t] ATS slave resp addr=%#x size=%#x\n",
+                    action.pkt->getAddr(), action.pkt->getSize());
+
+            assert(action.ifc);
+            action.ifc->schedAtsTimingResp(action.pkt);
+
+            delete proc;
+            break;
+
+        case ACTION_DELAY:
+        case ACTION_SLEEP:
+            break;
+
+        case ACTION_TERMINATE:
+            delete proc;
+            break;
+
+        default:
+            panic("Unknown action\n");
+    }
+
+    return action;
+}
+
+void
+SMMUv3::processCommands()
+{
+    DPRINTF(SMMUv3, "processCommands()\n");
+
+    if (system.isAtomicMode()) {
+        SMMUAction a = runProcessAtomic(&commandExecutor, NULL);
+        (void) a;
+    } else if (system.isTimingMode()) {
+        if (!commandExecutor.isBusy())
+            runProcessTiming(&commandExecutor, NULL);
+    } else {
+        panic("Not in timing or atomic mode!");
+    }
+}
+
+void
+SMMUv3::processCommand(const SMMUCommand &cmd)
+{
+    switch (cmd.type) {
+        case CMD_PRF_CONFIG:
+            DPRINTF(SMMUv3, "CMD_PREFETCH_CONFIG - ignored\n");
+            break;
+
+        case CMD_PRF_ADDR:
+            DPRINTF(SMMUv3, "CMD_PREFETCH_ADDR - ignored\n");
+            break;
+
+        case CMD_INV_STE:
+            DPRINTF(SMMUv3, "CMD_INV_STE sid=%#x\n", cmd.data[0]);
+            configCache.invalidateSID(cmd.data[0]);
+            break;
+
+        case CMD_INV_CD:
+            DPRINTF(SMMUv3, "CMD_INV_CD sid=%#x ssid=%#x\n",
+                cmd.data[0], cmd.data[1]);
+            configCache.invalidateSSID(cmd.data[0], cmd.data[1]);
+            break;
+
+        case CMD_INV_CD_ALL:
+            DPRINTF(SMMUv3, "CMD_INV_CD_ALL sid=%#x\n", cmd.data[0]);
+            configCache.invalidateSID(cmd.data[0]);
+            break;
+
+        case CMD_INV_ALL:
+            DPRINTF(SMMUv3, "CMD_INV_ALL\n");
+            configCache.invalidateAll();
+            break;
+
+        case CMD_TLBI_ALL:
+            DPRINTF(SMMUv3, "CMD_TLBI_ALL\n");
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateAll();
+                slave_interface->mainTLB->invalidateAll();
+            }
+            tlb.invalidateAll();
+            ipaCache.invalidateAll();
+            walkCache.invalidateAll();
+            break;
+
+        case CMD_TLBI_ASID:
+            DPRINTF(SMMUv3, "CMD_TLBI_ASID asid=%#x vmid=%#x\n",
+                cmd.data[0], cmd.data[1]);
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateASID(
+                    cmd.data[0], cmd.data[1]);
+                slave_interface->mainTLB->invalidateASID(
+                    cmd.data[0], cmd.data[1]);
+            }
+            tlb.invalidateASID(cmd.data[0], cmd.data[1]);
+            walkCache.invalidateASID(cmd.data[0], cmd.data[1]);
+            break;
+
+        case CMD_TLBI_VAAL:
+            DPRINTF(SMMUv3, "CMD_TLBI_VAAL va=%#08x vmid=%#x\n",
+                cmd.data[0], cmd.data[1]);
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateVAA(
+                    cmd.data[0], cmd.data[1]);
+                slave_interface->mainTLB->invalidateVAA(
+                    cmd.data[0], cmd.data[1]);
+            }
+            tlb.invalidateVAA(cmd.data[0], cmd.data[1]);
+            break;
+
+        case CMD_TLBI_VAA:
+            DPRINTF(SMMUv3, "CMD_TLBI_VAA va=%#08x vmid=%#x\n",
+                cmd.data[0], cmd.data[1]);
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateVAA(
+                    cmd.data[0], cmd.data[1]);
+                slave_interface->mainTLB->invalidateVAA(
+                    cmd.data[0], cmd.data[1]);
+            }
+            tlb.invalidateVAA(cmd.data[0], cmd.data[1]);
+            walkCache.invalidateVAA(cmd.data[0], cmd.data[1]);
+            break;
+
+        case CMD_TLBI_VAL:
+            DPRINTF(SMMUv3, "CMD_TLBI_VAL va=%#08x asid=%#x vmid=%#x\n",
+                cmd.data[0], cmd.data[1], cmd.data[2]);
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateVA(
+                    cmd.data[0], cmd.data[1], cmd.data[2]);
+                slave_interface->mainTLB->invalidateVA(
+                    cmd.data[0], cmd.data[1], cmd.data[2]);
+            }
+            tlb.invalidateVA(cmd.data[0], cmd.data[1], cmd.data[2]);
+            break;
+
+        case CMD_TLBI_VA:
+            DPRINTF(SMMUv3, "CMD_TLBI_VA va=%#08x asid=%#x vmid=%#x\n",
+                cmd.data[0], cmd.data[1], cmd.data[2]);
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateVA(
+                    cmd.data[0], cmd.data[1], cmd.data[2]);
+                slave_interface->mainTLB->invalidateVA(
+                    cmd.data[0], cmd.data[1], cmd.data[2]);
+            }
+            tlb.invalidateVA(cmd.data[0], cmd.data[1], cmd.data[2]);
+            walkCache.invalidateVA(cmd.data[0], cmd.data[1], cmd.data[2]);
+            break;
+
+        case CMD_TLBI_VM_IPAL:
+            DPRINTF(SMMUv3, "CMD_TLBI_VM_IPAL ipa=%#08x vmid=%#x\n",
+                cmd.data[0], cmd.data[1]);
+            // This does not invalidate TLBs containing
+            // combined Stage1 + Stage2 translations, as per the spec.
+            ipaCache.invalidateIPA(cmd.data[0], cmd.data[1]);
+            walkCache.invalidateVMID(cmd.data[1]);
+            break;
+
+        case CMD_TLBI_VM_IPA:
+            DPRINTF(SMMUv3, "CMD_TLBI_VM_IPA ipa=%#08x vmid=%#x\n",
+                cmd.data[0], cmd.data[1]);
+            // This does not invalidate TLBs containing
+            // combined Stage1 + Stage2 translations, as per the spec.
+            ipaCache.invalidateIPA(cmd.data[0], cmd.data[1]);
+            walkCache.invalidateVMID(cmd.data[1]);
+            break;
+
+        case CMD_TLBI_VM_S12:
+            DPRINTF(SMMUv3, "CMD_TLBI_VM_S12 vmid=%#x\n", cmd.data[0]);
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateVMID(cmd.data[0]);
+                slave_interface->mainTLB->invalidateVMID(cmd.data[0]);
+            }
+            tlb.invalidateVMID(cmd.data[0]);
+            ipaCache.invalidateVMID(cmd.data[0]);
+            walkCache.invalidateVMID(cmd.data[0]);
+            break;
+
+        case CMD_RESUME_S:
+            DPRINTF(SMMUv3, "CMD_RESUME_S\n");
+            panic("resume unimplemented");
+            break;
+
+        default:
+            warn("Unimplemented command %#x\n", cmd.type);
+            break;
+    }
+}
+
+const PageTableOps*
+SMMUv3::getPageTableOps(uint8_t trans_granule)
+{
+    static V8PageTableOps4k  ptOps4k;
+    static V8PageTableOps64k ptOps64k;
+
+    switch (trans_granule) {
+    case TRANS_GRANULE_4K:  return &ptOps4k;
+    case TRANS_GRANULE_64K: return &ptOps64k;
+    default:
+        panic("Unknown translation granule size %d", trans_granule);
+    }
+}
+
+Tick
+SMMUv3::readControl(PacketPtr pkt)
+{
+    DPRINTF(SMMUv3, "readControl:  addr=%08x size=%d\n",
+            pkt->getAddr(), pkt->getSize());
+
+    int offset = pkt->getAddr() - regsMap.start();
+    assert(offset >= 0 && offset < SMMU_REG_SIZE);
+
+    if (inSecureBlock(offset)) {
+        warn("smmu: secure registers (0x%x) are not implemented\n",
+             offset);
+    }
+
+    auto reg_ptr = regs.data + offset;
+
+    switch (pkt->getSize()) {
+      case sizeof(uint32_t):
+        pkt->setLE<uint32_t>(*reinterpret_cast<uint32_t *>(reg_ptr));
+        break;
+      case sizeof(uint64_t):
+        pkt->setLE<uint64_t>(*reinterpret_cast<uint64_t *>(reg_ptr));
+        break;
+      default:
+        panic("smmu: unallowed access size: %d bytes\n", pkt->getSize());
+        break;
+    }
+
+    pkt->makeAtomicResponse();
+
+    return 0;
+}
+
+Tick
+SMMUv3::writeControl(PacketPtr pkt)
+{
+    int offset = pkt->getAddr() - regsMap.start();
+    assert(offset >= 0 && offset < SMMU_REG_SIZE);
+
+    DPRINTF(SMMUv3, "writeControl: addr=%08x size=%d data=%16x\n",
+            pkt->getAddr(), pkt->getSize(),
+            pkt->getSize() == sizeof(uint64_t) ?
+            pkt->getLE<uint64_t>() : pkt->getLE<uint32_t>());
+
+    switch (offset) {
+        case offsetof(SMMURegs, cr0):
+            assert(pkt->getSize() == sizeof(uint32_t));
+            regs.cr0 = regs.cr0ack = pkt->getLE<uint32_t>();
+            break;
+
+        case offsetof(SMMURegs, cr1):
+        case offsetof(SMMURegs, cr2):
+        case offsetof(SMMURegs, strtab_base_cfg):
+        case offsetof(SMMURegs, eventq_cons):
+        case offsetof(SMMURegs, eventq_irq_cfg1):
+        case offsetof(SMMURegs, priq_cons):
+            assert(pkt->getSize() == sizeof(uint32_t));
+            *reinterpret_cast<uint32_t *>(regs.data + offset) =
+                pkt->getLE<uint32_t>();
+            break;
+
+        case offsetof(SMMURegs, cmdq_prod):
+            assert(pkt->getSize() == sizeof(uint32_t));
+            *reinterpret_cast<uint32_t *>(regs.data + offset) =
+                pkt->getLE<uint32_t>();
+            schedule(processCommandsEvent, nextCycle());
+            break;
+
+        case offsetof(SMMURegs, strtab_base):
+        case offsetof(SMMURegs, eventq_irq_cfg0):
+            assert(pkt->getSize() == sizeof(uint64_t));
+            *reinterpret_cast<uint64_t *>(regs.data + offset) =
+                pkt->getLE<uint64_t>();
+            break;
+
+        case offsetof(SMMURegs, cmdq_base):
+            assert(pkt->getSize() == sizeof(uint64_t));
+            *reinterpret_cast<uint64_t *>(regs.data + offset) =
+                pkt->getLE<uint64_t>();
+            regs.cmdq_cons = 0;
+            regs.cmdq_prod = 0;
+            break;
+
+
+        case offsetof(SMMURegs, eventq_base):
+            assert(pkt->getSize() == sizeof(uint64_t));
+            *reinterpret_cast<uint64_t *>(regs.data + offset) =
+                pkt->getLE<uint64_t>();
+            regs.eventq_cons = 0;
+            regs.eventq_prod = 0;
+            break;
+
+        case offsetof(SMMURegs, priq_base):
+            assert(pkt->getSize() == sizeof(uint64_t));
+            *reinterpret_cast<uint64_t *>(regs.data + offset) =
+                pkt->getLE<uint64_t>();
+            regs.priq_cons = 0;
+            regs.priq_prod = 0;
+            break;
+
+        default:
+            if (inSecureBlock(offset)) {
+                warn("smmu: secure registers (0x%x) are not implemented\n",
+                     offset);
+            } else {
+                warn("smmu: write to read-only/undefined register at 0x%x\n",
+                     offset);
+            }
+    }
+
+    pkt->makeAtomicResponse();
+
+    return 0;
+}
+
+bool
+SMMUv3::inSecureBlock(uint32_t offs) const
+{
+    if (offs >= offsetof(SMMURegs, _secure_regs) && offs < SMMU_SECURE_SZ)
+        return true;
+    else
+        return false;
+}
+
+void
+SMMUv3::init()
+{
+    // make sure both sides are connected and have the same block size
+    if (!masterPort.isConnected())
+        fatal("Master port is not connected.\n");
+
+    // If the second master port is connected for the table walks, enable
+    // the mode to send table walks through this port instead
+    if (masterTableWalkPort.isConnected())
+        tableWalkPortEnable = true;
+
+    // notify the master side  of our address ranges
+    for (auto ifc : slaveInterfaces) {
+        ifc->sendRange();
+    }
+
+    if (controlPort.isConnected())
+        controlPort.sendRangeChange();
+}
+
+void
+SMMUv3::regStats()
+{
+    MemObject::regStats();
+
+    using namespace Stats;
+
+    for (size_t i = 0; i < slaveInterfaces.size(); i++) {
+        slaveInterfaces[i]->microTLB->regStats(
+            csprintf("%s.utlb%d", name(), i));
+        slaveInterfaces[i]->mainTLB->regStats(
+            csprintf("%s.maintlb%d", name(), i));
+    }
+
+    tlb.regStats(name() + ".tlb");
+    configCache.regStats(name() + ".cfg");
+    ipaCache.regStats(name() + ".ipa");
+    walkCache.regStats(name() + ".walk");
+
+    steL1Fetches
+        .name(name() + ".steL1Fetches")
+        .desc("STE L1 fetches")
+        .flags(pdf);
+
+    steFetches
+        .name(name() + ".steFetches")
+        .desc("STE fetches")
+        .flags(pdf);
+
+    cdL1Fetches
+        .name(name() + ".cdL1Fetches")
+        .desc("CD L1 fetches")
+        .flags(pdf);
+
+    cdFetches
+        .name(name() + ".cdFetches")
+        .desc("CD fetches")
+        .flags(pdf);
+
+    translationTimeDist
+        .init(0, 2000000, 2000)
+        .name(name() + ".translationTimeDist")
+        .desc("Time to translate address")
+        .flags(pdf);
+
+    ptwTimeDist
+        .init(0, 2000000, 2000)
+        .name(name() + ".ptwTimeDist")
+        .desc("Time to walk page tables")
+        .flags(pdf);
+}
+
+DrainState
+SMMUv3::drain()
+{
+    panic("SMMUv3 doesn't support draining\n");
+}
+
+void
+SMMUv3::serialize(CheckpointOut &cp) const
+{
+    DPRINTF(Checkpoint, "Serializing SMMUv3\n");
+
+    SERIALIZE_ARRAY(regs.data, sizeof(regs.data) / sizeof(regs.data[0]));
+}
+
+void
+SMMUv3::unserialize(CheckpointIn &cp)
+{
+    DPRINTF(Checkpoint, "Unserializing SMMUv3\n");
+
+    UNSERIALIZE_ARRAY(regs.data, sizeof(regs.data) / sizeof(regs.data[0]));
+}
+
+Port&
+SMMUv3::getPort(const std::string &name, PortID id)
+{
+    if (name == "master") {
+        return masterPort;
+    } else if (name == "master_walker") {
+        return masterTableWalkPort;
+    } else if (name == "control") {
+        return controlPort;
+    } else {
+        return MemObject::getPort(name, id);
+    }
+}
+
+SMMUv3*
+SMMUv3Params::create()
+{
+    return new SMMUv3(this);
+}
diff --git a/src/dev/arm/smmu_v3.hh b/src/dev/arm/smmu_v3.hh
new file mode 100644
index 0000000..f02ef2f
--- /dev/null
+++ b/src/dev/arm/smmu_v3.hh
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_HH__
+#define __DEV_ARM_SMMU_V3_HH__
+
+#include <list>
+#include <map>
+#include <queue>
+#include <string>
+#include <vector>
+
+#include "base/statistics.hh"
+#include "dev/arm/smmu_v3_caches.hh"
+#include "dev/arm/smmu_v3_cmdexec.hh"
+#include "dev/arm/smmu_v3_defs.hh"
+#include "dev/arm/smmu_v3_events.hh"
+#include "dev/arm/smmu_v3_ports.hh"
+#include "dev/arm/smmu_v3_proc.hh"
+#include "dev/arm/smmu_v3_ptops.hh"
+#include "dev/arm/smmu_v3_slaveifc.hh"
+#include "mem/mem_object.hh"
+#include "mem/packet.hh"
+#include "params/SMMUv3.hh"
+#include "sim/eventq.hh"
+
+/**
+ * @file:
+ * This is an implementation of the SMMUv3 architecture.
+ *
+ * What can it do?
+ * - Single-stage and nested translation with 4k or 64k granule.  16k would
+ *   be straightforward to add.
+ * - Large pages are supported.
+ * - Works with any gem5 device as long as it is issuing packets with a
+ *   valid (Sub)StreamId
+ *
+ * What it can't do?
+ * - Fragment stage 1 page when the underlying stage 2 page is smaller.  S1
+ *   page size > S2 page size is not supported
+ * - Invalidations take zero time. This wouldn't be hard to fix.
+ * - Checkpointing is not supported
+ * - Stall/resume for faulting transactions is not supported
+ */
+class SMMUTranslationProcess;
+
+class SMMUv3 : public MemObject
+{
+  protected:
+
+    friend class SMMUProcess;
+    friend class SMMUTranslationProcess;
+    friend class SMMUCommandExecProcess;
+    friend class SMMUv3SlaveInterface;
+
+    const System &system;
+    const MasterID masterId;
+
+    SMMUMasterPort    masterPort;
+    SMMUMasterTableWalkPort masterTableWalkPort;
+    SMMUControlPort   controlPort;
+
+    ARMArchTLB  tlb;
+    ConfigCache configCache;
+    IPACache    ipaCache;
+    WalkCache   walkCache;
+
+    const bool tlbEnable;
+    const bool configCacheEnable;
+    const bool ipaCacheEnable;
+    const bool walkCacheEnable;
+    bool tableWalkPortEnable;
+
+    const bool walkCacheNonfinalEnable;
+    const unsigned walkCacheS1Levels;
+    const unsigned walkCacheS2Levels;
+    const unsigned masterPortWidth; // in bytes
+
+    SMMUSemaphore tlbSem;
+    SMMUSemaphore ifcSmmuSem;
+    SMMUSemaphore smmuIfcSem;
+    SMMUSemaphore configSem;
+    SMMUSemaphore ipaSem;
+    SMMUSemaphore walkSem;
+    SMMUSemaphore masterPortSem;
+
+    SMMUSemaphore transSem; // max N transactions in SMMU
+    SMMUSemaphore ptwSem; // max N concurrent PTWs
+    SMMUSemaphore cycleSem; // max 1 table walk per cycle
+
+    // Timing parameters
+    const Cycles tlbLat;
+    const Cycles ifcSmmuLat;
+    const Cycles smmuIfcLat;
+    const Cycles configLat;
+    const Cycles ipaLat;
+    const Cycles walkLat;
+
+    // Stats
+    Stats::Scalar steL1Fetches;
+    Stats::Scalar steFetches;
+    Stats::Scalar cdL1Fetches;
+    Stats::Scalar cdFetches;
+    Stats::Distribution translationTimeDist;
+    Stats::Distribution ptwTimeDist;
+
+    std::vector<SMMUv3SlaveInterface *> slaveInterfaces;
+
+    SMMUCommandExecProcess commandExecutor;
+
+    const AddrRange regsMap;
+    SMMURegs regs;
+
+    bool inSecureBlock(uint32_t offs) const;
+
+    std::queue<SMMUAction> packetsToRetry;
+    std::queue<SMMUAction> packetsTableWalkToRetry;
+
+
+    void scheduleSlaveRetries();
+
+    SMMUAction runProcess(SMMUProcess *proc, PacketPtr pkt);
+    SMMUAction runProcessAtomic(SMMUProcess *proc, PacketPtr pkt);
+    SMMUAction runProcessTiming(SMMUProcess *proc, PacketPtr pkt);
+
+    void processCommands();
+    EventWrapper<SMMUv3, &SMMUv3::processCommands> processCommandsEvent;
+
+    void processCommand(const SMMUCommand &cmd);
+
+    const PageTableOps *getPageTableOps(uint8_t trans_granule);
+
+  public:
+    SMMUv3(SMMUv3Params *p);
+    virtual ~SMMUv3() {}
+
+    virtual void init() override;
+    virtual void regStats() override;
+
+    Tick slaveRecvAtomic(PacketPtr pkt, PortID id);
+    bool slaveRecvTimingReq(PacketPtr pkt, PortID id);
+    bool masterRecvTimingResp(PacketPtr pkt);
+    void masterRecvReqRetry();
+
+    bool masterTableWalkRecvTimingResp(PacketPtr pkt);
+    void masterTableWalkRecvReqRetry();
+
+    Tick readControl(PacketPtr pkt);
+    Tick writeControl(PacketPtr pkt);
+
+    DrainState drain() override;
+    void serialize(CheckpointOut &cp) const override;
+    void unserialize(CheckpointIn &cp) override;
+
+    virtual Port &getPort(const std::string &name,
+                          PortID id = InvalidPortID) override;
+};
+
+#endif /* __DEV_ARM_SMMU_V3_HH__ */
diff --git a/src/dev/arm/smmu_v3_caches.cc b/src/dev/arm/smmu_v3_caches.cc
new file mode 100644
index 0000000..6dcaec6
--- /dev/null
+++ b/src/dev/arm/smmu_v3_caches.cc
@@ -0,0 +1,1226 @@
+/*
+ * Copyright (c) 2014, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ *          Damian Richardson
+ */
+
+#include "dev/arm/smmu_v3_caches.hh"
+
+#include <numeric>
+
+#include "base/bitfield.hh"
+#include "base/intmath.hh"
+#include "base/logging.hh"
+#include "sim/stats.hh"
+
+
+// taken from hex expansion of pi
+#define SMMUTLB_SEED     0xEA752DFE
+#define ARMARCHTLB_SEED  0x8B021FA1
+#define IPACACHE_SEED    0xE5A0CC0F
+#define CONFIGCACHE_SEED 0xB56F74E8
+#define WALKCACHE_SEED   0x18ACF3D6
+
+/*
+ * BaseCache
+ *
+ * TODO: move more code into this base class to reduce duplication.
+ */
+
+SMMUv3BaseCache::SMMUv3BaseCache(const std::string &policy_name, uint32_t seed) :
+    replacementPolicy(decodePolicyName(policy_name)),
+    nextToReplace(0),
+    random(seed),
+    useStamp(0)
+{}
+
+int
+SMMUv3BaseCache::decodePolicyName(const std::string &policy_name)
+{
+    if (policy_name == "rr") {
+        return SMMU_CACHE_REPL_ROUND_ROBIN;
+    } else if (policy_name == "rand") {
+        return SMMU_CACHE_REPL_RANDOM;
+    } else if (policy_name == "lru") {
+        return SMMU_CACHE_REPL_LRU;
+    } else {
+        panic("Unknown cache replacement policy '%s'\n", policy_name);
+    }
+}
+
+void
+SMMUv3BaseCache::regStats(const std::string &name)
+{
+    using namespace Stats;
+
+
+    averageLookups
+        .name(name + ".averageLookups")
+        .desc("Average number lookups per second")
+        .flags(pdf);
+
+    totalLookups
+        .name(name + ".totalLookups")
+        .desc("Total number of lookups")
+        .flags(pdf);
+
+    averageLookups = totalLookups / simSeconds;
+
+
+    averageMisses
+        .name(name + ".averageMisses")
+        .desc("Average number misses per second")
+        .flags(pdf);
+
+    totalMisses
+        .name(name + ".totalMisses")
+        .desc("Total number of misses")
+        .flags(pdf);
+
+    averageMisses = totalMisses / simSeconds;
+
+
+    averageUpdates
+        .name(name + ".averageUpdates")
+        .desc("Average number updates per second")
+        .flags(pdf);
+
+    totalUpdates
+        .name(name + ".totalUpdates")
+        .desc("Total number of updates")
+        .flags(pdf);
+
+    averageUpdates = totalUpdates / simSeconds;
+
+
+    averageHitRate
+        .name(name + ".averageHitRate")
+        .desc("Average hit rate")
+        .flags(pdf);
+
+    averageHitRate = (totalLookups - totalMisses) / totalLookups;
+
+    insertions
+        .name(name + ".insertions")
+        .desc("Number of insertions (not replacements)")
+        .flags(pdf);
+}
+
+
+
+/*
+ * SMMUTLB
+ */
+
+SMMUTLB::SMMUTLB(unsigned numEntries, unsigned _associativity,
+                 const std::string &policy)
+:
+    SMMUv3BaseCache(policy, SMMUTLB_SEED),
+    associativity(_associativity)
+{
+    if (associativity == 0)
+        associativity = numEntries; // fully associative
+
+    if (numEntries == 0)
+        fatal("SMMUTLB must have at least one entry\n");
+
+    if (associativity > numEntries)
+        fatal("SMMUTLB associativity cannot be higher than "
+              "its number of entries\n");
+
+    unsigned num_sets = numEntries / associativity;
+
+    if (num_sets*associativity != numEntries)
+        fatal("Number of SMMUTLB entries must be divisible "
+              "by its associativity\n");
+
+    Entry e;
+    e.valid = false;
+
+    Set set(associativity, e);
+    sets.resize(num_sets, set);
+}
+
+const SMMUTLB::Entry*
+SMMUTLB::lookup(uint32_t sid, uint32_t ssid,
+                Addr va, bool updStats)
+{
+    const Entry *result = NULL;
+
+    Set &set = sets[pickSetIdx(va)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        const Entry &e = set[i];
+
+        if (e.valid && (e.va & e.vaMask) == (va & e.vaMask) &&
+            e.sid==sid && e.ssid==ssid)
+        {
+            if (result != NULL)
+                panic("SMMUTLB: duplicate entry found!\n");
+
+            result = &e;
+            break;
+        }
+    }
+
+    if (updStats) {
+        if (result)
+            result->lastUsed = useStamp++;
+
+        totalLookups++;
+        if (result == NULL)
+            totalMisses++;
+    }
+
+    return result;
+}
+
+const SMMUTLB::Entry*
+SMMUTLB::lookupAnyVA(uint32_t sid, uint32_t ssid, bool updStats)
+{
+    const Entry *result = NULL;
+
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            const Entry &e = set[i];
+
+            if (e.valid && e.sid==sid && e.ssid==ssid) {
+                result = &e;
+                break;
+            }
+        }
+    }
+
+    if (updStats) {
+        totalLookups++;
+        if (result == NULL)
+            totalMisses++;
+    }
+
+    return result;
+}
+
+void
+SMMUTLB::store(const Entry &incoming, AllocPolicy alloc)
+{
+    if (!incoming.valid)
+        panic("Tried to store an invalid entry\n");
+
+    incoming.lastUsed = 0;
+
+    const Entry *existing =
+        lookup(incoming.sid, incoming.ssid, incoming.va, false);
+
+    if (existing) {
+        *const_cast<Entry *> (existing) = incoming;
+    } else {
+        Set &set = sets[pickSetIdx(incoming.va)];
+        set[pickEntryIdxToReplace(set, alloc)] = incoming;
+    }
+
+    totalUpdates++;
+}
+
+void
+SMMUTLB::invalidateVA(Addr va, uint16_t asid, uint16_t vmid)
+{
+    Set &set = sets[pickSetIdx(va)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        Entry &e = set[i];
+
+        if ((e.va & e.vaMask) == (va & e.vaMask) &&
+            e.asid==asid && e.vmid==vmid)
+        {
+            e.valid = false;
+        }
+    }
+}
+
+void
+SMMUTLB::invalidateVAA(Addr va, uint16_t vmid)
+{
+    Set &set = sets[pickSetIdx(va)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        Entry &e = set[i];
+
+        if ((e.va & e.vaMask) == (va & e.vaMask) && e.vmid==vmid)
+            e.valid = false;
+    }
+}
+
+void
+SMMUTLB::invalidateASID(uint16_t asid, uint16_t vmid)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if (e.asid==asid && e.vmid==vmid)
+                e.valid = false;
+        }
+    }
+}
+
+void
+SMMUTLB::invalidateVMID(uint16_t vmid)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if (e.vmid == vmid)
+                e.valid = false;
+        }
+    }
+}
+
+void
+SMMUTLB::invalidateAll()
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++)
+            set[i].valid = false;
+    }
+}
+
+size_t
+SMMUTLB::pickSetIdx(Addr va) const
+{
+    return (va >> 12) % sets.size();
+}
+
+size_t
+SMMUTLB::pickEntryIdxToReplace(const Set &set, AllocPolicy alloc)
+{
+    if (alloc == ALLOC_LAST_WAY)
+        return associativity - 1;
+
+    uint32_t lru_tick = UINT32_MAX;
+    size_t lru_idx = 0;
+    size_t max_idx =
+        alloc==ALLOC_ANY_BUT_LAST_WAY ?
+            set.size()-1 : set.size();
+
+    for (size_t i = 0; i < max_idx; i++) {
+        if (!set[i].valid) {
+            insertions++;
+            return i;
+        }
+
+        if (set[i].lastUsed < lru_tick) {
+            lru_idx = i;
+            lru_tick = set[i].lastUsed;
+        }
+    }
+
+    switch (replacementPolicy) {
+    case SMMU_CACHE_REPL_ROUND_ROBIN:
+        switch (alloc) {
+        case ALLOC_ANY_WAY:
+            return nextToReplace = ((nextToReplace+1) % associativity);
+        case ALLOC_ANY_BUT_LAST_WAY:
+            return nextToReplace = ((nextToReplace+1) % (associativity-1));
+        default:
+            panic("Unknown allocation mode %d\n", alloc);
+        }
+
+    case SMMU_CACHE_REPL_RANDOM:
+        switch (alloc) {
+        case ALLOC_ANY_WAY:
+            return random.random<size_t>(0, associativity-1);
+        case ALLOC_ANY_BUT_LAST_WAY:
+            return random.random<size_t>(0, associativity-2);
+        default:
+            panic("Unknown allocation mode %d\n", alloc);
+        }
+
+    case SMMU_CACHE_REPL_LRU:
+        return lru_idx;
+
+    default:
+        panic("Unknown replacement policy %d\n", replacementPolicy);
+    }
+}
+
+
+
+/*
+ * ARMArchTLB
+ */
+
+ARMArchTLB::ARMArchTLB(unsigned numEntries, unsigned _associativity,
+                       const std::string &policy)
+:
+    SMMUv3BaseCache(policy, ARMARCHTLB_SEED),
+    associativity(_associativity)
+{
+    if (associativity == 0)
+        associativity = numEntries; // fully associative
+
+    if (numEntries == 0)
+        fatal("ARMArchTLB must have at least one entry\n");
+
+    if (associativity > numEntries)
+        fatal("ARMArchTLB associativity cannot be higher than "
+              "its number of entries\n");
+
+    unsigned num_sets = numEntries / associativity;
+
+    if (num_sets*associativity != numEntries)
+        fatal("Number of ARMArchTLB entries must be divisible "
+              "by its associativity\n");
+
+    Entry e;
+    e.valid = false;
+
+    Set set(associativity, e);
+    sets.resize(num_sets, set);
+}
+
+const ARMArchTLB::Entry *
+ARMArchTLB::lookup(Addr va, uint16_t asid, uint16_t vmid, bool updStats)
+{
+    const Entry *result = NULL;
+
+    Set &set = sets[pickSetIdx(va, asid, vmid)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        const Entry &e = set[i];
+
+        if (e.valid && (e.va & e.vaMask) == (va & e.vaMask) &&
+            e.asid==asid && e.vmid==vmid)
+        {
+            if (result != NULL)
+                panic("ARMArchTLB: duplicate entry found!\n");
+
+            result = &e;
+            break;
+        }
+    }
+
+    if (updStats) {
+        if (result)
+            result->lastUsed = useStamp++;
+
+        totalLookups++;
+        if (result == NULL)
+            totalMisses++;
+    }
+
+    return result;
+}
+
+void
+ARMArchTLB::store(const Entry &incoming)
+{
+    if (!incoming.valid)
+        panic("Tried to store an invalid entry\n");
+
+    incoming.lastUsed = 0;
+
+    const Entry *existing =
+        lookup(incoming.va, incoming.asid, incoming.vmid, false);
+
+    if (existing) {
+        *const_cast<Entry *> (existing) = incoming;
+    } else {
+        Set &set = sets[pickSetIdx(incoming.va, incoming.asid, incoming.vmid)];
+        set[pickEntryIdxToReplace(set)] = incoming;
+    }
+
+    totalUpdates++;
+}
+
+void
+ARMArchTLB::invalidateVA(Addr va, uint16_t asid, uint16_t vmid)
+{
+    Set &set = sets[pickSetIdx(va, asid, vmid)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        Entry &e = set[i];
+
+        if ((e.va & e.vaMask) == (va & e.vaMask) &&
+            e.asid==asid && e.vmid==vmid)
+        {
+            e.valid = false;
+        }
+    }
+}
+
+void
+ARMArchTLB::invalidateVAA(Addr va, uint16_t vmid)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if ((e.va & e.vaMask) == (va & e.vaMask) && e.vmid==vmid)
+                e.valid = false;
+        }
+    }
+}
+
+void
+ARMArchTLB::invalidateASID(uint16_t asid, uint16_t vmid)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if (e.asid==asid && e.vmid==vmid)
+                e.valid = false;
+        }
+    }
+}
+
+void
+ARMArchTLB::invalidateVMID(uint16_t vmid)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if (e.vmid == vmid)
+                e.valid = false;
+        }
+    }
+}
+
+void
+ARMArchTLB::invalidateAll()
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++)
+            set[i].valid = false;
+    }
+}
+
+size_t
+ARMArchTLB::pickSetIdx(Addr va, uint16_t asid, uint16_t vmid) const
+{
+    return ((va >> 12) ^ asid ^ vmid) % sets.size();
+}
+
+size_t
+ARMArchTLB::pickEntryIdxToReplace(const Set &set)
+{
+    size_t lru_idx = 0;
+    uint32_t lru_tick = UINT32_MAX;
+
+    for (size_t i = 0; i < set.size(); i++) {
+        if (!set[i].valid) {
+            insertions++;
+            return i;
+        }
+
+        if (set[i].lastUsed < lru_tick) {
+            lru_idx = i;
+            lru_tick = set[i].lastUsed;
+        }
+    }
+
+    switch (replacementPolicy) {
+    case SMMU_CACHE_REPL_ROUND_ROBIN:
+        return nextToReplace = ((nextToReplace+1) % associativity);
+
+    case SMMU_CACHE_REPL_RANDOM:
+        return random.random<size_t>(0, associativity-1);
+
+    case SMMU_CACHE_REPL_LRU:
+        return lru_idx;
+
+    default:
+        panic("Unknown replacement policy %d\n", replacementPolicy);
+    }
+
+}
+
+/*
+ * IPACache
+ */
+
+IPACache::IPACache(unsigned numEntries, unsigned _associativity,
+                   const std::string &policy)
+:
+    SMMUv3BaseCache(policy, IPACACHE_SEED),
+    associativity(_associativity)
+{
+    if (associativity == 0)
+        associativity = numEntries; // fully associative
+
+    if (numEntries == 0)
+        fatal("IPACache must have at least one entry\n");
+
+    if (associativity > numEntries)
+        fatal("IPACache associativity cannot be higher than "
+              "its number of entries\n");
+
+    unsigned num_sets = numEntries / associativity;
+
+    if (num_sets*associativity != numEntries)
+        fatal("Number of IPACache entries must be divisible "
+              "by its associativity\n");
+
+    Entry e;
+    e.valid = false;
+
+    Set set(associativity, e);
+    sets.resize(num_sets, set);
+}
+
+const IPACache::Entry*
+IPACache::lookup(Addr ipa, uint16_t vmid, bool updStats)
+{
+    const Entry *result = NULL;
+
+    Set &set = sets[pickSetIdx(ipa, vmid)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        const Entry &e = set[i];
+
+        if (e.valid && (e.ipa & e.ipaMask) == (ipa & e.ipaMask) &&
+            e.vmid==vmid)
+        {
+            if (result != NULL)
+                panic("IPACache: duplicate entry found!\n");
+
+            result = &e;
+            break;
+        }
+    }
+
+    if (updStats) {
+        if (result)
+            result->lastUsed = useStamp++;
+
+        totalLookups++;
+        if (result == NULL)
+            totalMisses++;
+    }
+
+    return result;
+}
+
+void
+IPACache::store(const Entry &incoming)
+{
+    if (!incoming.valid)
+        panic("Tried to store an invalid entry\n");
+
+    incoming.lastUsed = 0;
+
+    const Entry *existing = lookup(incoming.ipa, incoming.vmid, false);
+
+    if (existing) {
+        *const_cast<Entry *> (existing) = incoming;
+    } else {
+        Set &set = sets[pickSetIdx(incoming.ipa, incoming.vmid)];
+        set[pickEntryIdxToReplace(set)] = incoming;
+    }
+
+    totalUpdates++;
+}
+
+void
+IPACache::invalidateIPA(Addr ipa, uint16_t vmid)
+{
+    Set &set = sets[pickSetIdx(ipa, vmid)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        Entry &e = set[i];
+
+        if ((e.ipa & e.ipaMask) == (ipa & e.ipaMask) && e.vmid==vmid)
+            e.valid = false;
+    }
+}
+
+void
+IPACache::invalidateIPAA(Addr ipa)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if ((e.ipa & e.ipaMask) == (ipa & e.ipaMask))
+                e.valid = false;
+        }
+    }
+}
+
+void
+IPACache::invalidateVMID(uint16_t vmid)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if (e.vmid == vmid)
+                e.valid = false;
+        }
+    }
+}
+
+void
+IPACache::invalidateAll()
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++)
+            set[i].valid = false;
+    }
+}
+
+size_t
+IPACache::pickSetIdx(Addr va, uint16_t vmid) const
+{
+    return ((va >> 12) ^ vmid) % sets.size();
+}
+
+size_t
+IPACache::pickEntryIdxToReplace(const Set &set)
+{
+    size_t lru_idx = 0;
+    uint32_t lru_tick = UINT32_MAX;
+
+    for (size_t i = 0; i < set.size(); i++) {
+        if (!set[i].valid) {
+            insertions++;
+            return i;
+        }
+
+        if (set[i].lastUsed < lru_tick) {
+            lru_idx = i;
+            lru_tick = set[i].lastUsed;
+        }
+    }
+
+    switch (replacementPolicy) {
+    case SMMU_CACHE_REPL_ROUND_ROBIN:
+        return nextToReplace = ((nextToReplace+1) % associativity);
+
+    case SMMU_CACHE_REPL_RANDOM:
+        return random.random<size_t>(0, associativity-1);
+
+    case SMMU_CACHE_REPL_LRU:
+        return lru_idx;
+
+    default:
+        panic("Unknown replacement policy %d\n", replacementPolicy);
+    }
+
+}
+
+/*
+ * ConfigCache
+ */
+
+ConfigCache::ConfigCache(unsigned numEntries, unsigned _associativity,
+                         const std::string &policy)
+:
+    SMMUv3BaseCache(policy, CONFIGCACHE_SEED),
+    associativity(_associativity)
+{
+    if (associativity == 0)
+        associativity = numEntries; // fully associative
+
+    if (numEntries == 0)
+        fatal("ConfigCache must have at least one entry\n");
+
+    if (associativity > numEntries)
+        fatal("ConfigCache associativity cannot be higher than "
+              "its number of entries\n");
+
+    unsigned num_sets = numEntries / associativity;
+
+    if (num_sets*associativity != numEntries)
+        fatal("Number of ConfigCache entries must be divisible "
+              "by its associativity\n");
+
+    Entry e;
+    e.valid = false;
+
+    Set set(associativity, e);
+    sets.resize(num_sets, set);
+}
+
+const ConfigCache::Entry *
+ConfigCache::lookup(uint32_t sid, uint32_t ssid, bool updStats)
+{
+    const Entry *result = NULL;
+
+    Set &set = sets[pickSetIdx(sid, ssid)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        const Entry &e = set[i];
+
+        if (e.valid && e.sid==sid && e.ssid==ssid)
+        {
+            if (result != NULL)
+                panic("ConfigCache: duplicate entry found!\n");
+
+            result = &e;
+            break;
+        }
+    }
+
+    if (updStats) {
+        if (result)
+            result->lastUsed = useStamp++;
+
+        totalLookups++;
+        if (result == NULL)
+            totalMisses++;
+    }
+
+    return result;
+}
+
+void
+ConfigCache::store(const Entry &incoming)
+{
+    if (!incoming.valid)
+        panic("Tried to store an invalid entry\n");
+
+    incoming.lastUsed = 0;
+
+    const Entry *existing = lookup(incoming.sid, incoming.ssid, false);
+
+    if (existing) {
+        *const_cast<Entry *> (existing) = incoming;
+    } else {
+        Set &set = sets[pickSetIdx(incoming.sid, incoming.ssid)];
+        set[pickEntryIdxToReplace(set)] = incoming;
+    }
+
+    totalUpdates++;
+}
+
+void
+ConfigCache::invalidateSSID(uint32_t sid, uint32_t ssid)
+{
+    Set &set = sets[pickSetIdx(sid, ssid)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        Entry &e = set[i];
+
+        if (e.sid==sid && e.ssid==ssid)
+            e.valid = false;
+    }
+}
+
+void
+ConfigCache::invalidateSID(uint32_t sid)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if (e.sid == sid)
+                e.valid = false;
+        }
+    }
+}
+
+void
+ConfigCache::invalidateAll()
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++)
+            set[i].valid = false;
+    }
+}
+
+size_t
+ConfigCache::pickSetIdx(uint32_t sid, uint32_t ssid) const
+{
+    return (sid^ssid) % sets.size();
+}
+
+size_t
+ConfigCache::pickEntryIdxToReplace(const Set &set)
+{
+    size_t lru_idx = 0;
+    uint32_t lru_tick = UINT32_MAX;
+
+    for (size_t i = 0; i < set.size(); i++) {
+        if (!set[i].valid) {
+            insertions++;
+            return i;
+        }
+
+        if (set[i].lastUsed < lru_tick) {
+            lru_idx = i;
+            lru_tick = set[i].lastUsed;
+        }
+    }
+
+    switch (replacementPolicy) {
+    case SMMU_CACHE_REPL_ROUND_ROBIN:
+        return nextToReplace = ((nextToReplace+1) % associativity);
+
+    case SMMU_CACHE_REPL_RANDOM:
+        return random.random<size_t>(0, associativity-1);
+
+    case SMMU_CACHE_REPL_LRU:
+        return lru_idx;
+
+    default:
+        panic("Unknown replacement policy %d\n", replacementPolicy);
+    }
+
+}
+
+/*
+ * WalkCache
+ */
+
+WalkCache::WalkCache(const std::array<unsigned, 2*WALK_CACHE_LEVELS> &_sizes,
+                     unsigned _associativity, const std::string &policy) :
+    SMMUv3BaseCache(policy, WALKCACHE_SEED),
+    associativity(_associativity),
+    sizes()
+{
+    unsigned numEntries = std::accumulate(&_sizes[0],
+                                          &_sizes[2*WALK_CACHE_LEVELS], 0);
+
+    if (associativity == 0)
+        associativity = numEntries; // fully associative
+
+    if (numEntries == 0)
+        fatal("WalkCache must have at least one entry\n");
+
+    for (size_t i = 0; i < 2*WALK_CACHE_LEVELS; i++){
+        if (_sizes[i] % associativity != 0)
+              fatal("Number of WalkCache entries at each level must be "
+                    "divisible by WalkCache associativity\n");
+
+        sizes[i] = _sizes[i] /  associativity;
+        offsets[i] = i==0 ? 0 : offsets[i-1] + sizes[i-1];
+    }
+
+    if (associativity > numEntries)
+        fatal("WalkCache associativity cannot be higher than "
+              "its number of entries\n");
+
+    unsigned num_sets = numEntries / associativity;
+
+    if (num_sets*associativity != numEntries)
+        fatal("Number of WalkCache entries must be divisible "
+              "by its associativity\n");
+
+    Entry e;
+    e.valid = false;
+
+    Set set(associativity, e);
+    sets.resize(num_sets, set);
+}
+
+const WalkCache::Entry*
+WalkCache::lookup(Addr va, Addr vaMask,
+                  uint16_t asid, uint16_t vmid,
+                  unsigned stage, unsigned level,
+                  bool updStats)
+{
+    const Entry *result = NULL;
+
+    Set &set = sets[pickSetIdx(va, vaMask, stage, level)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        const Entry &e = set[i];
+
+        if (e.valid && (e.va & e.vaMask) == (va & e.vaMask) &&
+            e.asid==asid && e.vmid==vmid && e.stage==stage && e.level==level)
+        {
+            if (result != NULL)
+                panic("WalkCache: duplicate entry found!\n");
+
+            result = &e;
+            break;
+        }
+    }
+
+    if (updStats) {
+        if (result)
+            result->lastUsed = useStamp++;
+
+        totalLookups++;
+        if (result == NULL)
+            totalMisses++;
+
+        lookupsByStageLevel[stage-1][level]++;
+        totalLookupsByStageLevel[stage-1][level]++;
+        if (result == NULL) {
+            missesByStageLevel[stage-1][level]++;
+            totalMissesByStageLevel[stage-1][level]++;
+        }
+    }
+
+    return result;
+}
+
+void
+WalkCache::store(const Entry &incoming)
+{
+    if (!incoming.valid)
+        panic("Tried to store an invalid entry\n");
+
+    assert(incoming.stage==1 || incoming.stage==2);
+    assert(incoming.level<=WALK_CACHE_LEVELS);
+
+    incoming.lastUsed = 0;
+
+    const Entry *existing = lookup(incoming.va, incoming.vaMask,
+                                   incoming.asid, incoming.vmid,
+                                   incoming.stage, incoming.level, false);
+
+    if (existing) {
+        *const_cast<Entry *> (existing) = incoming;
+    } else {
+        Set &set = sets[pickSetIdx(incoming.va, incoming.vaMask,
+                                   incoming.stage, incoming.level)];
+        set[pickEntryIdxToReplace(set, incoming.stage, incoming.level)] =
+            incoming;
+    }
+
+    totalUpdates++;
+    updatesByStageLevel[incoming.stage-1][incoming.level]++;
+    totalUpdatesByStageLevel[incoming.stage-1][incoming.level]++;
+}
+
+void
+WalkCache::invalidateVA(Addr va, uint16_t asid, uint16_t vmid)
+{
+    panic("%s unimplemented\n", __func__);
+}
+
+void
+WalkCache::invalidateVAA(Addr va, uint16_t vmid)
+{
+    panic("%s unimplemented\n", __func__);
+}
+
+void
+WalkCache::invalidateASID(uint16_t asid, uint16_t vmid)
+{
+    panic("%s unimplemented\n", __func__);
+}
+
+void
+WalkCache::invalidateVMID(uint16_t vmid)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if (e.vmid == vmid)
+                e.valid = false;
+        }
+    }
+}
+
+void
+WalkCache::invalidateAll()
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++)
+            set[i].valid = false;
+    }
+}
+
+size_t
+WalkCache::pickSetIdx(Addr va, Addr vaMask,
+                      unsigned stage, unsigned level) const
+{
+    (void) stage;
+
+    int size, offset;
+
+    switch (stage) {
+        case 1:
+            assert (level<=3);
+            size = sizes[0*WALK_CACHE_LEVELS + level];
+            offset = offsets[0*WALK_CACHE_LEVELS + level];
+            break;
+
+        case 2:
+            assert (level<=3);
+            size = sizes[1*WALK_CACHE_LEVELS + level];
+            offset = offsets[1*WALK_CACHE_LEVELS + level];
+            break;
+
+        default:
+            panic("bad stage");
+    }
+
+    return ((va >> findLsbSet(vaMask)) % size) + offset;
+}
+
+size_t
+WalkCache::pickEntryIdxToReplace(const Set &set,
+                                 unsigned stage, unsigned level)
+{
+    size_t lru_idx = 0;
+    uint32_t lru_tick = UINT32_MAX;
+
+    for (size_t i = 0; i < set.size(); i++) {
+        if (!set[i].valid) {
+            insertions++;
+            insertionsByStageLevel[stage-1][level]++;
+            return i;
+        }
+
+        if (set[i].lastUsed < lru_tick) {
+            lru_idx = i;
+            lru_tick = set[i].lastUsed;
+        }
+    }
+
+    switch (replacementPolicy) {
+    case SMMU_CACHE_REPL_ROUND_ROBIN:
+        return nextToReplace = ((nextToReplace+1) % associativity);
+
+    case SMMU_CACHE_REPL_RANDOM:
+        return random.random<size_t>(0, associativity-1);
+
+    case SMMU_CACHE_REPL_LRU:
+        return lru_idx;
+
+    default:
+        panic("Unknown replacement policy %d\n", replacementPolicy);
+    }
+
+}
+
+void
+WalkCache::regStats(const std::string &name)
+{
+    using namespace Stats;
+
+    SMMUv3BaseCache::regStats(name);
+
+    for (int s = 0; s < 2; s++) {
+        for (int l = 0; l < WALK_CACHE_LEVELS; l++) {
+            averageLookupsByStageLevel[s][l]
+                .name(csprintf("%s.averageLookupsS%dL%d", name, s+1, l))
+                .desc("Average number lookups per second")
+                .flags(pdf);
+
+            totalLookupsByStageLevel[s][l]
+                .name(csprintf("%s.totalLookupsS%dL%d", name, s+1, l))
+                .desc("Total number of lookups")
+                .flags(pdf);
+
+            averageLookupsByStageLevel[s][l] =
+                totalLookupsByStageLevel[s][l] / simSeconds;
+
+
+            averageMissesByStageLevel[s][l]
+                .name(csprintf("%s.averageMissesS%dL%d", name, s+1, l))
+                .desc("Average number misses per second")
+                .flags(pdf);
+
+            totalMissesByStageLevel[s][l]
+                .name(csprintf("%s.totalMissesS%dL%d", name, s+1, l))
+                .desc("Total number of misses")
+                .flags(pdf);
+
+            averageMissesByStageLevel[s][l] =
+                totalMissesByStageLevel[s][l] / simSeconds;
+
+
+            averageUpdatesByStageLevel[s][l]
+                .name(csprintf("%s.averageUpdatesS%dL%d", name, s+1, l))
+                .desc("Average number updates per second")
+                .flags(pdf);
+
+            totalUpdatesByStageLevel[s][l]
+                .name(csprintf("%s.totalUpdatesS%dL%d", name, s+1, l))
+                .desc("Total number of updates")
+                .flags(pdf);
+
+            averageUpdatesByStageLevel[s][l] =
+                totalUpdatesByStageLevel[s][l] / simSeconds;
+
+
+            averageHitRateByStageLevel[s][l]
+                .name(csprintf("%s.averageHitRateS%dL%d", name, s+1, l))
+                .desc("Average hit rate")
+                .flags(pdf);
+
+            averageHitRateByStageLevel[s][l] =
+                (totalLookupsByStageLevel[s][l] -
+                 totalMissesByStageLevel[s][l])
+                / totalLookupsByStageLevel[s][l];
+
+            insertionsByStageLevel[s][l]
+                .name(csprintf("%s.insertionsS%dL%d", name, s+1, l))
+                .desc("Number of insertions (not replacements)")
+                .flags(pdf);
+        }
+    }
+}
diff --git a/src/dev/arm/smmu_v3_caches.hh b/src/dev/arm/smmu_v3_caches.hh
new file mode 100644
index 0000000..ce5bb45
--- /dev/null
+++ b/src/dev/arm/smmu_v3_caches.hh
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2014, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_CACHES_HH__
+#define __DEV_ARM_SMMU_V3_CACHES_HH__
+
+#include <stdint.h>
+
+#include <array>
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include "base/random.hh"
+#include "base/statistics.hh"
+#include "base/types.hh"
+
+#define WALK_CACHE_LEVELS 4
+
+enum {
+    SMMU_CACHE_REPL_ROUND_ROBIN,
+    SMMU_CACHE_REPL_RANDOM,
+    SMMU_CACHE_REPL_LRU,
+};
+
+class SMMUv3BaseCache
+{
+  protected:
+    int replacementPolicy;
+    size_t nextToReplace;
+    Random random;
+    uint32_t useStamp;
+
+    Stats::Formula averageLookups;
+    Stats::Scalar totalLookups;
+
+    Stats::Formula averageMisses;
+    Stats::Scalar totalMisses;
+
+    Stats::Formula averageUpdates;
+    Stats::Scalar totalUpdates;
+
+    Stats::Formula averageHitRate;
+
+    Stats::Scalar insertions;
+
+    static int decodePolicyName(const std::string &policy_name);
+
+  public:
+    SMMUv3BaseCache(const std::string &policy_name, uint32_t seed);
+    virtual ~SMMUv3BaseCache() {}
+
+    virtual void regStats(const std::string &name);
+};
+
+class SMMUTLB : public SMMUv3BaseCache
+{
+  public:
+    enum AllocPolicy {
+        ALLOC_ANY_WAY,
+        ALLOC_ANY_BUT_LAST_WAY,
+        ALLOC_LAST_WAY,
+    };
+
+    struct Entry
+    {
+        bool valid;
+        bool prefetched;
+        mutable uint32_t lastUsed;
+
+        // TAGS
+        uint32_t sid;
+        uint32_t ssid;
+        Addr va;
+        Addr vaMask;
+
+        // EXTRA TAGS
+        uint16_t asid;
+        uint16_t vmid;
+
+        // OUTPUTS
+        Addr pa;
+        uint8_t permissions;
+    };
+
+    SMMUTLB(unsigned numEntries, unsigned _associativity,
+            const std::string &policy);
+    SMMUTLB(const SMMUTLB& tlb) = delete;
+    virtual ~SMMUTLB() {}
+
+    const Entry *lookup(uint32_t sid, uint32_t ssid, Addr va,
+                        bool updStats=true);
+    const Entry *lookupAnyVA(uint32_t sid, uint32_t ssid,
+                             bool updStats=true);
+    void store(const Entry &incoming, AllocPolicy alloc);
+
+    void invalidateVA(Addr va, uint16_t asid, uint16_t vmid);
+    void invalidateVAA(Addr va, uint16_t vmid);
+    void invalidateASID(uint16_t asid, uint16_t vmid);
+    void invalidateVMID(uint16_t vmid);
+    void invalidateAll();
+
+  private:
+    typedef std::vector<Entry> Set;
+    std::vector<Set> sets;
+
+    size_t associativity;
+
+    size_t pickSetIdx(Addr va) const;
+    size_t pickEntryIdxToReplace(const Set &set, AllocPolicy alloc);
+};
+
+class ARMArchTLB : public SMMUv3BaseCache
+{
+  public:
+    struct Entry
+    {
+        bool valid;
+        mutable uint32_t lastUsed;
+
+        // TAGS
+        Addr va;
+        Addr vaMask;
+        uint16_t asid;
+        uint16_t vmid;
+
+        // OUTPUTS
+        Addr pa;
+        uint8_t permissions;
+    };
+
+    ARMArchTLB(unsigned numEntries, unsigned _associativity,
+               const std::string &policy);
+    virtual ~ARMArchTLB() {}
+
+    const Entry *lookup(Addr va, uint16_t asid, uint16_t vmid,
+                        bool updStats=true);
+
+    void store(const Entry &incoming);
+
+    void invalidateVA(Addr va, uint16_t asid, uint16_t vmid);
+    void invalidateVAA(Addr va, uint16_t vmid);
+    void invalidateASID(uint16_t asid, uint16_t vmid);
+    void invalidateVMID(uint16_t vmid);
+    void invalidateAll();
+
+  private:
+    typedef std::vector<Entry> Set;
+    std::vector<Set> sets;
+
+    size_t associativity;
+
+    size_t pickSetIdx(Addr va, uint16_t asid, uint16_t vmid) const;
+    size_t pickEntryIdxToReplace(const Set &set);
+};
+
+class IPACache : public SMMUv3BaseCache
+{
+  public:
+    struct Entry
+    {
+        bool valid;
+        mutable uint32_t lastUsed;
+
+        // TAGS
+        Addr ipa;
+        Addr ipaMask;
+        uint16_t vmid;
+
+        // OUTPUTS
+        Addr pa;
+        uint8_t permissions;
+    };
+
+    IPACache(unsigned numEntries, unsigned _associativity,
+             const std::string &policy);
+    virtual ~IPACache() {}
+
+    const Entry *lookup(Addr ipa, uint16_t vmid, bool updStats=true);
+    void store(const Entry &incoming);
+
+    void invalidateIPA(Addr ipa, uint16_t vmid);
+    void invalidateIPAA(Addr ipa);
+    void invalidateVMID(uint16_t vmid);
+    void invalidateAll();
+
+  private:
+    typedef std::vector<Entry> Set;
+    std::vector<Set> sets;
+
+    size_t associativity;
+
+    size_t pickSetIdx(Addr ipa, uint16_t vmid) const;
+    size_t pickEntryIdxToReplace(const Set &set);
+};
+
+class ConfigCache : public SMMUv3BaseCache
+{
+  public:
+    struct Entry
+    {
+        bool valid;
+        mutable uint32_t lastUsed;
+
+        // TAGS
+        uint32_t sid;
+        uint32_t ssid;
+
+        // OUTPUTS
+        bool stage1_en;
+        bool stage2_en;
+        Addr ttb0;
+        Addr ttb1;
+        Addr httb;
+        uint16_t asid;
+        uint16_t vmid;
+        uint8_t stage1_tg;
+        uint8_t stage2_tg;
+    };
+
+    ConfigCache(unsigned numEntries, unsigned _associativity,
+                const std::string &policy);
+    virtual ~ConfigCache() {}
+
+    const Entry *lookup(uint32_t sid, uint32_t ssid, bool updStats=true);
+    void store(const Entry &incoming);
+
+    void invalidateSSID(uint32_t sid, uint32_t ssid);
+    void invalidateSID(uint32_t sid);
+    void invalidateAll();
+
+  private:
+    typedef std::vector<Entry> Set;
+    std::vector<Set> sets;
+
+    size_t associativity;
+
+    size_t pickSetIdx(uint32_t sid, uint32_t ssid) const;
+    size_t pickEntryIdxToReplace(const Set &set);
+};
+
+class WalkCache : public SMMUv3BaseCache
+{
+  public:
+    struct Entry
+    {
+        bool valid;
+        mutable uint32_t lastUsed;
+
+        // TAGS
+        Addr va;
+        Addr vaMask;
+        uint16_t asid;
+        uint16_t vmid;
+        unsigned stage;
+        unsigned level;
+
+        // OUTPUTS
+        bool leaf;
+        Addr pa;
+        uint8_t permissions;
+    };
+
+    WalkCache(const std::array<unsigned, 2*WALK_CACHE_LEVELS> &_sizes,
+              unsigned _associativity, const std::string &policy);
+    virtual ~WalkCache() {}
+
+    const Entry *lookup(Addr va, Addr vaMask, uint16_t asid, uint16_t vmid,
+                        unsigned stage, unsigned level, bool updStats=true);
+    void store(const Entry &incoming);
+
+    void invalidateVA(Addr va, uint16_t asid, uint16_t vmid);
+    void invalidateVAA(Addr va, uint16_t vmid);
+    void invalidateASID(uint16_t asid, uint16_t vmid);
+    void invalidateVMID(uint16_t vmid);
+    void invalidateAll();
+
+    void regStats(const std::string &name) override;
+
+  protected:
+    unsigned int lookupsByStageLevel[2][WALK_CACHE_LEVELS];
+    Stats::Formula averageLookupsByStageLevel[2][WALK_CACHE_LEVELS];
+    Stats::Scalar totalLookupsByStageLevel[2][WALK_CACHE_LEVELS];
+
+    unsigned int missesByStageLevel[2][WALK_CACHE_LEVELS];
+    Stats::Formula averageMissesByStageLevel[2][WALK_CACHE_LEVELS];
+    Stats::Scalar totalMissesByStageLevel[2][WALK_CACHE_LEVELS];
+
+    unsigned int updatesByStageLevel[2][WALK_CACHE_LEVELS];
+    Stats::Formula averageUpdatesByStageLevel[2][WALK_CACHE_LEVELS];
+    Stats::Scalar totalUpdatesByStageLevel[2][WALK_CACHE_LEVELS];
+
+    Stats::Formula averageHitRateByStageLevel[2][WALK_CACHE_LEVELS];
+
+    Stats::Scalar insertionsByStageLevel[2][WALK_CACHE_LEVELS];
+
+  private:
+    typedef std::vector<Entry> Set;
+    std::vector<Set> sets;
+
+    size_t associativity;
+    std::array<unsigned, 2*WALK_CACHE_LEVELS> sizes;
+    std::array<unsigned, 2*WALK_CACHE_LEVELS> offsets;
+
+    size_t pickSetIdx(Addr va, Addr vaMask,
+                      unsigned stage, unsigned level) const;
+
+    size_t pickEntryIdxToReplace(const Set &set,
+                                 unsigned stage, unsigned level);
+};
+
+#endif /* __DEV_ARM_SMMU_V3_CACHES_HH__ */
diff --git a/src/dev/arm/smmu_v3_cmdexec.cc b/src/dev/arm/smmu_v3_cmdexec.cc
new file mode 100644
index 0000000..8660846
--- /dev/null
+++ b/src/dev/arm/smmu_v3_cmdexec.cc
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#include "dev/arm/smmu_v3_cmdexec.hh"
+
+#include "base/bitfield.hh"
+#include "dev/arm/smmu_v3.hh"
+
+void
+SMMUCommandExecProcess::main(Yield &yield)
+{
+    SMMUAction a;
+    a.type = ACTION_INITIAL_NOP;
+    a.pkt = NULL;
+    a.ifc = nullptr;
+    a.delay = 0;
+    yield(a);
+
+    while (true) {
+        busy = true;
+
+        while (true) {
+            int sizeMask =
+                mask(smmu.regs.cmdq_base & Q_BASE_SIZE_MASK) & Q_CONS_PROD_MASK;
+
+            if ((smmu.regs.cmdq_cons & sizeMask) ==
+                    (smmu.regs.cmdq_prod & sizeMask))
+                break; // command queue empty
+
+            Addr cmdAddr =
+                (smmu.regs.cmdq_base & Q_BASE_ADDR_MASK) +
+                (smmu.regs.cmdq_cons & sizeMask) * sizeof(SMMUCommand);
+
+            // This deliberately resets the error field in cmdq_cons!
+            smmu.regs.cmdq_cons = (smmu.regs.cmdq_cons + 1) & sizeMask;
+
+            doRead(yield, cmdAddr, &cmd, sizeof(SMMUCommand));
+            smmu.processCommand(cmd);
+        }
+
+        busy = false;
+
+        doSleep(yield);
+    }
+}
diff --git a/src/dev/arm/smmu_v3_cmdexec.hh b/src/dev/arm/smmu_v3_cmdexec.hh
new file mode 100644
index 0000000..4ce3958
--- /dev/null
+++ b/src/dev/arm/smmu_v3_cmdexec.hh
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_CMDEXEC_HH__
+#define __DEV_ARM_SMMU_V3_CMDEXEC_HH__
+
+#include "dev/arm/smmu_v3_defs.hh"
+#include "dev/arm/smmu_v3_proc.hh"
+
+class SMMUv3;
+
+class SMMUCommandExecProcess : public SMMUProcess
+{
+  private:
+    SMMUCommand cmd;
+
+    bool busy;
+
+    virtual void main(Yield &yield);
+
+  public:
+    SMMUCommandExecProcess(const std::string &name, SMMUv3 &_smmu) :
+        SMMUProcess(name, _smmu),
+        busy(false)
+    {
+        reinit();
+    }
+
+    virtual ~SMMUCommandExecProcess() {}
+
+    bool isBusy() const { return busy; }
+};
+
+#endif  /* __DEV_ARM_SMMU_V3_CMDEXEC_HH__ */
diff --git a/src/dev/arm/smmu_v3_defs.hh b/src/dev/arm/smmu_v3_defs.hh
new file mode 100644
index 0000000..f74f819
--- /dev/null
+++ b/src/dev/arm/smmu_v3_defs.hh
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_DEFS_HH__
+#define __DEV_ARM_SMMU_V3_DEFS_HH__
+
+#include <stdint.h>
+
+#include "base/bitunion.hh"
+
+enum {
+    SMMU_SECURE_SZ = 0x184, // Secure regs are within page0
+    SMMU_PAGE_ZERO_SZ = 0x10000,
+    SMMU_PAGE_ONE_SZ = 0x10000,
+    SMMU_REG_SIZE = SMMU_PAGE_ONE_SZ + SMMU_PAGE_ZERO_SZ
+};
+
+enum {
+    STE_CONFIG_ABORT        = 0x0,
+    STE_CONFIG_BYPASS       = 0x4,
+    STE_CONFIG_STAGE1_ONLY  = 0x5,
+    STE_CONFIG_STAGE2_ONLY  = 0x6,
+    STE_CONFIG_STAGE1_AND_2 = 0x7,
+};
+
+enum {
+    STAGE1_CFG_1L     = 0x0,
+    STAGE1_CFG_2L_4K  = 0x1,
+    STAGE1_CFG_2L_64K = 0x2,
+};
+
+enum {
+    ST_CFG_SPLIT_SHIFT = 6,
+    ST_CD_ADDR_SHIFT   = 5,
+    CD_TTB_SHIFT       = 4,
+    STE_S2TTB_SHIFT    = 4,
+};
+
+enum {
+    TRANS_GRANULE_4K      = 0x0,
+    TRANS_GRANULE_64K     = 0x1,
+    TRANS_GRANULE_16K     = 0x2,
+    TRANS_GRANULE_INVALID = 0x3,
+};
+
+enum {
+    ST_BASE_ADDR_MASK  = 0x0000ffffffffffe0ULL,
+    ST_CFG_SIZE_MASK   = 0x000000000000003fULL,
+    ST_CFG_SPLIT_MASK  = 0x00000000000007c0ULL,
+    ST_CFG_FMT_MASK    = 0x0000000000030000ULL,
+    ST_CFG_FMT_LINEAR  = 0x0000000000000000ULL,
+    ST_CFG_FMT_2LEVEL  = 0x0000000000010000ULL,
+    ST_L2_SPAN_MASK    = 0x000000000000001fULL,
+    ST_L2_ADDR_MASK    = 0x0000ffffffffffe0ULL,
+
+    VMT_BASE_ADDR_MASK = 0x0000ffffffffffe0ULL,
+    VMT_BASE_SIZE_MASK = 0x000000000000001fULL,
+
+    Q_CONS_PROD_MASK   = 0x00000000000fffffULL,
+    Q_BASE_ADDR_MASK   = 0x0000ffffffffffe0ULL,
+    Q_BASE_SIZE_MASK   = 0x000000000000001fULL,
+
+    E_BASE_ENABLE_MASK = 0x8000000000000000ULL,
+    E_BASE_ADDR_MASK   = 0x0000fffffffffffcULL,
+};
+
+union SMMURegs
+{
+    uint8_t data[SMMU_REG_SIZE];
+
+    struct
+    {
+        uint32_t idr0;        // 0x0000
+        uint32_t idr1;        // 0x0004
+        uint32_t idr2;        // 0x0008
+        uint32_t idr3;        // 0x000c
+        uint32_t idr4;        // 0x0010
+        uint32_t idr5;        // 0x0014
+        uint32_t iidr;        // 0x0018
+        uint32_t aidr;        // 0x001c
+        uint32_t cr0;         // 0x0020
+        uint32_t cr0ack;      // 0x0024
+        uint32_t cr1;         // 0x0028
+        uint32_t cr2;         // 0x002c
+        uint32_t _pad1;       // 0x0030
+        uint32_t _pad2;       // 0x0034
+        uint32_t _pad3;       // 0x0038
+        uint32_t _pad4;       // 0x003c
+        uint32_t statusr;     // 0x0040
+        uint32_t gbpa;        // 0x0044
+        uint32_t agbpa;       // 0x0048
+        uint32_t _pad5;       // 0x004c
+        uint32_t irq_ctrl;    // 0x0050
+        uint32_t irq_ctrlack; // 0x0054
+        uint32_t _pad6;       // 0x0058
+        uint32_t _pad7;       // 0x005c
+
+        uint32_t gerror;          // 0x0060
+        uint32_t gerrorn;         // 0x0064
+        uint64_t gerror_irq_cfg0; // 0x0068, 64 bit
+        uint32_t gerror_irq_cfg1; // 0x0070
+        uint32_t gerror_irq_cfg2; // 0x0074
+        uint32_t _pad_1;          // 0x0078
+        uint32_t _pad_2;          // 0x007c
+
+        uint64_t strtab_base;     // 0x0080, 64 bit
+        uint32_t strtab_base_cfg; // 0x0088
+
+        uint64_t cmdq_base;       // 0x0090, 64 bit
+        uint32_t cmdq_prod;       // 0x0098
+        uint32_t cmdq_cons;       // 0x009c
+        uint64_t eventq_base;     // 0x00a0, 64 bit
+        uint32_t _pad8;           // 0x00a8
+        uint32_t _pad9;           // 0x00ac
+        uint64_t eventq_irq_cfg0; // 0x00b0, 64 bit
+        uint32_t eventq_irq_cfg1; // 0x00b8
+        uint32_t eventq_irq_cfg2; // 0x00bc
+        uint64_t priq_base;       // 0x00c0, 64 bit
+        uint32_t _pad10;          // 0x00c8
+        uint32_t _pad11;          // 0x00cc
+
+        uint64_t priq_irq_cfg0;   // 0x00d0
+        uint32_t priq_irq_cfg1;   // 0x00d8
+        uint32_t priq_irq_cfg2;   // 0x00dc
+
+        uint32_t _pad12[8];       // 0x00e0 - 0x0100
+        uint32_t gatos_ctrl;      // 0x0100
+        uint32_t _pad13;          // 0x0104
+        uint64_t gatos_sid;       // 0x0108
+        uint64_t gatos_addr;      // 0x0110
+        uint64_t gatos_par;       // 0x0118
+        uint32_t _pad14[24];      // 0x0120
+        uint32_t vatos_sel;       // 0x0180
+
+        uint32_t _pad15[8095];    // 0x184 - 0x7ffc
+
+        uint8_t  _secure_regs[SMMU_SECURE_SZ]; // 0x8000 - 0x8180
+
+        uint32_t _pad16[8095];    // 0x8184 - 0x10000
+
+        // Page 1
+        uint32_t _pad17[42];      // 0x10000
+        uint32_t eventq_prod;     // 0x100A8
+        uint32_t eventq_cons;     // 0x100AC
+
+        uint32_t _pad18[6];       // 0x100B0
+        uint32_t priq_prod;       // 0x100C8
+        uint32_t priq_cons;       // 0x100CC
+    };
+};
+
+struct StreamTableEntry
+{
+    BitUnion64(DWORD0)
+        Bitfield<0>       valid;
+        Bitfield<3, 1>    config;
+        Bitfield<5, 4>    s1fmt;
+        Bitfield<51, 6>   s1ctxptr;
+        Bitfield<63, 59>  s1cdmax;
+    EndBitUnion(DWORD0)
+    DWORD0 dw0;
+
+    BitUnion64(DWORD1)
+        Bitfield<1, 0>   s1dss;
+        Bitfield<3, 2>   s1cir;
+        Bitfield<5, 4>   s1cor;
+        Bitfield<7, 6>   s1csh;
+        Bitfield<8>      s2hwu59;
+        Bitfield<9>      s2hwu60;
+        Bitfield<10>     s2hwu61;
+        Bitfield<11>     s2hwu62;
+        Bitfield<12>     dre;
+        Bitfield<16, 13> cont;
+        Bitfield<17>     dcp;
+        Bitfield<18>     ppar;
+        Bitfield<19>     mev;
+        Bitfield<27>     s1stalld;
+        Bitfield<29, 28> eats;
+        Bitfield<31, 30> strw;
+        Bitfield<35, 32> memattr;
+        Bitfield<36>     mtcfg;
+        Bitfield<40, 37> alloccfg;
+        Bitfield<45, 44> shcfg;
+        Bitfield<47, 46> nscfg;
+        Bitfield<49, 48> privcfg;
+        Bitfield<51, 50> instcfg;
+    EndBitUnion(DWORD1)
+    DWORD1 dw1;
+
+    BitUnion64(DWORD2)
+        Bitfield<15, 0>  s2vmid;
+        Bitfield<37, 32> s2t0sz;
+        Bitfield<39, 38> s2sl0;
+        Bitfield<41, 40> s2ir0;
+        Bitfield<43, 42> s2or0;
+        Bitfield<45, 44> s2sh0;
+        Bitfield<47, 46> s2tg;
+        Bitfield<50, 48> s2ps;
+        Bitfield<51>     s2aa64;
+        Bitfield<52>     s2endi;
+        Bitfield<53>     s2affd;
+        Bitfield<54>     s2ptw;
+        Bitfield<55>     s2hd;
+        Bitfield<56>     s2ha;
+        Bitfield<57>     s2s;
+        Bitfield<58>     s2r;
+    EndBitUnion(DWORD2)
+    DWORD2 dw2;
+
+    BitUnion64(DWORD3)
+        Bitfield<51, 4> s2ttb;
+    EndBitUnion(DWORD3)
+    DWORD3 dw3;
+
+    uint64_t _pad[4];
+};
+
+struct ContextDescriptor
+{
+    BitUnion64(DWORD0)
+        Bitfield<5, 0>   t0sz;
+        Bitfield<7, 6>   tg0;
+        Bitfield<9, 8>   ir0;
+        Bitfield<11, 10> or0;
+        Bitfield<13, 12> sh0;
+        Bitfield<14>     epd0;
+        Bitfield<15>     endi;
+        Bitfield<21, 16> t1sz;
+        Bitfield<23, 22> tg1;
+        Bitfield<25, 24> ir1;
+        Bitfield<27, 26> or1;
+        Bitfield<29, 28> sh1;
+        Bitfield<30>     epd1;
+        Bitfield<31>     valid;
+        Bitfield<34, 32> ips;
+        Bitfield<35>     affd;
+        Bitfield<36>     wxn;
+        Bitfield<37>     uwxn;
+        Bitfield<39, 38> tbi;
+        Bitfield<40>     pan;
+        Bitfield<41>     aa64;
+        Bitfield<42>     hd;
+        Bitfield<43>     ha;
+        Bitfield<44>     s;
+        Bitfield<45>     r;
+        Bitfield<46>     a;
+        Bitfield<47>     aset;
+        Bitfield<63, 48> asid;
+    EndBitUnion(DWORD0)
+    DWORD0 dw0;
+
+    BitUnion64(DWORD1)
+        Bitfield<0>      nscfg0;
+        Bitfield<1>      had0;
+        Bitfield<51, 4>  ttb0;
+        Bitfield<60>     hwu0g59;
+        Bitfield<61>     hwu0g60;
+        Bitfield<62>     hwu0g61;
+        Bitfield<63>     hwu0g62;
+    EndBitUnion(DWORD1)
+    DWORD1 dw1;
+
+    BitUnion64(DWORD2)
+        Bitfield<0>      nscfg1;
+        Bitfield<1>      had1;
+        Bitfield<51, 4>  ttb1;
+        Bitfield<60>     hwu1g59;
+        Bitfield<61>     hwu1g60;
+        Bitfield<62>     hwu1g61;
+        Bitfield<63>     hwu1g62;
+    EndBitUnion(DWORD2)
+    DWORD2 dw2;
+
+    uint64_t mair;
+    uint64_t amair;
+    uint64_t _pad[3];
+};
+
+enum SMMUCommandType {
+    CMD_PRF_CONFIG   = 0x1000,
+    CMD_PRF_ADDR     = 0x1001,
+    CMD_INV_STE      = 0x1100,
+    CMD_INV_CD       = 0x1101,
+    CMD_INV_CD_ALL   = 0x1102,
+    CMD_INV_ALL      = 0x1104,
+    CMD_TLBI_ALL     = 0x1110,
+    CMD_TLBI_ASID    = 0x1111,
+    CMD_TLBI_VAAL    = 0x1112,
+    CMD_TLBI_VAA     = 0x1113,
+    CMD_TLBI_VAL     = 0x1114,
+    CMD_TLBI_VA      = 0x1115,
+    CMD_TLBI_VM_IPAL = 0x1120,
+    CMD_TLBI_VM_IPA  = 0x1121,
+    CMD_TLBI_VM_S12  = 0x1122,
+    CMD_RESUME_S     = 0x1200,
+};
+
+struct SMMUCommand
+{
+    uint32_t type;
+    uint32_t data[3];
+};
+
+enum SMMUEventTypes {
+    EVT_FAULT = 0x0001,
+};
+
+enum SMMUEventFlags {
+    EVF_WRITE = 0x0001,
+};
+
+struct SMMUEvent
+{
+    uint16_t type;
+    uint16_t stag;
+    uint32_t flags;
+    uint32_t streamId;
+    uint32_t substreamId;
+    uint64_t va;
+    uint64_t ipa;
+};
+
+enum {
+    SMMU_MAX_TRANS_ID = 64
+};
+
+#endif /* __DEV_ARM_SMMU_V3_DEFS_HH__ */
diff --git a/src/dev/arm/smmu_v3_events.cc b/src/dev/arm/smmu_v3_events.cc
new file mode 100644
index 0000000..e548fb0
--- /dev/null
+++ b/src/dev/arm/smmu_v3_events.cc
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2014, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#include "dev/arm/smmu_v3_events.hh"
+
+#include "dev/arm/smmu_v3_slaveifc.hh"
+
+void
+SMMUDeviceRetryEvent::process()
+{
+    smmuIfc.sendDeviceRetry();
+}
+
+const std::string
+SMMUDeviceRetryEvent::name() const
+{
+    return smmuIfc.name() + ".device_retry_event";
+}
diff --git a/src/dev/arm/smmu_v3_events.hh b/src/dev/arm/smmu_v3_events.hh
new file mode 100644
index 0000000..21dbca8
--- /dev/null
+++ b/src/dev/arm/smmu_v3_events.hh
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2014, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_EVENTS_HH__
+#define __DEV_ARM_SMMU_V3_EVENTS_HH__
+
+#include <base/types.hh>
+#include <sim/eventq.hh>
+
+class SMMUv3SlaveInterface;
+
+class SMMUDeviceRetryEvent : public Event
+{
+  private:
+    SMMUv3SlaveInterface &smmuIfc;
+
+  public:
+    SMMUDeviceRetryEvent(SMMUv3SlaveInterface &ifc)
+        : smmuIfc(ifc)
+    {}
+
+    void process();
+
+    const std::string name() const;
+
+    const char *description() const
+    { return "SlaveRetryEvent"; }
+};
+
+#endif /* __DEV_ARM_SMMU_V3_EVENTS_HH__ */
diff --git a/src/dev/arm/smmu_v3_ports.cc b/src/dev/arm/smmu_v3_ports.cc
new file mode 100644
index 0000000..1785925
--- /dev/null
+++ b/src/dev/arm/smmu_v3_ports.cc
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#include "dev/arm/smmu_v3_ports.hh"
+
+#include "base/logging.hh"
+#include "dev/arm/smmu_v3.hh"
+#include "dev/arm/smmu_v3_slaveifc.hh"
+
+SMMUMasterPort::SMMUMasterPort(const std::string &_name, SMMUv3 &_smmu) :
+    MasterPort(_name, &_smmu),
+    smmu(_smmu)
+{}
+
+bool
+SMMUMasterPort::recvTimingResp(PacketPtr pkt)
+{
+    return smmu.masterRecvTimingResp(pkt);
+}
+
+void
+SMMUMasterPort::recvReqRetry()
+{
+    return smmu.masterRecvReqRetry();
+}
+
+SMMUMasterTableWalkPort::SMMUMasterTableWalkPort(const std::string &_name,
+                                                 SMMUv3 &_smmu) :
+    MasterPort(_name, &_smmu),
+    smmu(_smmu)
+{}
+
+bool
+SMMUMasterTableWalkPort::recvTimingResp(PacketPtr pkt)
+{
+    return smmu.masterTableWalkRecvTimingResp(pkt);
+}
+
+void
+SMMUMasterTableWalkPort::recvReqRetry()
+{
+    return smmu.masterTableWalkRecvReqRetry();
+}
+
+SMMUSlavePort::SMMUSlavePort(const std::string &_name,
+                             SMMUv3SlaveInterface &_ifc,
+                             PortID _id)
+:
+    QueuedSlavePort(_name, &_ifc, respQueue, _id),
+    ifc(_ifc),
+    respQueue(_ifc, *this)
+{}
+
+void
+SMMUSlavePort::recvFunctional(PacketPtr pkt)
+{
+    if (!respQueue.trySatisfyFunctional(pkt))
+        recvAtomic(pkt);
+}
+
+Tick
+SMMUSlavePort::recvAtomic(PacketPtr pkt)
+{
+    return ifc.recvAtomic(pkt);
+}
+
+bool
+SMMUSlavePort::recvTimingReq(PacketPtr pkt)
+{
+    return ifc.recvTimingReq(pkt);
+}
+
+SMMUControlPort::SMMUControlPort(const std::string &_name,
+                                 SMMUv3 &_smmu, AddrRange _addrRange)
+:
+    SimpleTimingPort(_name, &_smmu),
+    smmu(_smmu),
+    addrRange(_addrRange)
+{}
+
+Tick
+SMMUControlPort::recvAtomic(PacketPtr pkt)
+{
+    Addr addr = pkt->getAddr();
+    unsigned size = pkt->getSize();
+
+    if (!addrRange.contains(addr) || !addrRange.contains(addr+size))
+        panic("SMMU: invalid address on control port %x, packet size %d",
+                addr, size);
+
+    // @todo: We need to pay for this and not just zero it out
+    pkt->headerDelay = pkt->payloadDelay = 0;
+
+    return pkt->isRead() ? smmu.readControl(pkt) : smmu.writeControl(pkt);
+}
+
+AddrRangeList
+SMMUControlPort::getAddrRanges() const
+{
+    AddrRangeList list;
+    list.push_back(addrRange);
+    return list;
+}
+
+SMMUATSMasterPort::SMMUATSMasterPort(const std::string &_name,
+                                     SMMUv3SlaveInterface &_ifc) :
+    QueuedMasterPort(_name, &_ifc, reqQueue, snoopRespQueue),
+    ifc(_ifc),
+    reqQueue(_ifc, *this),
+    snoopRespQueue(_ifc, *this)
+{}
+
+bool
+SMMUATSMasterPort::recvTimingResp(PacketPtr pkt)
+{
+    return ifc.atsMasterRecvTimingResp(pkt);
+}
+
+SMMUATSSlavePort::SMMUATSSlavePort(const std::string &_name,
+                                   SMMUv3SlaveInterface &_ifc) :
+    QueuedSlavePort(_name, &_ifc, respQueue),
+    ifc(_ifc),
+    respQueue(_ifc, *this)
+{}
+
+void
+SMMUATSSlavePort::recvFunctional(PacketPtr pkt)
+{
+    panic("Functional access on ATS port!");
+}
+
+Tick
+SMMUATSSlavePort::recvAtomic(PacketPtr pkt)
+{
+    return ifc.atsSlaveRecvAtomic(pkt);
+}
+
+bool
+SMMUATSSlavePort::recvTimingReq(PacketPtr pkt)
+{
+    return ifc.atsSlaveRecvTimingReq(pkt);
+}
diff --git a/src/dev/arm/smmu_v3_ports.hh b/src/dev/arm/smmu_v3_ports.hh
new file mode 100644
index 0000000..bdd10e5
--- /dev/null
+++ b/src/dev/arm/smmu_v3_ports.hh
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_PORTS_HH__
+#define __DEV_ARM_SMMU_V3_PORTS_HH__
+
+#include "mem/qport.hh"
+#include "mem/tport.hh"
+
+class SMMUv3;
+class SMMUv3SlaveInterface;
+
+class SMMUMasterPort : public MasterPort
+{
+  protected:
+    SMMUv3 &smmu;
+
+    virtual bool recvTimingResp(PacketPtr pkt);
+    virtual void recvReqRetry();
+
+  public:
+    SMMUMasterPort(const std::string &_name, SMMUv3 &_smmu);
+    virtual ~SMMUMasterPort() {}
+};
+
+// Separate master port to send MMU initiated requests on
+class SMMUMasterTableWalkPort : public MasterPort
+{
+  protected:
+    SMMUv3 &smmu;
+
+    virtual bool recvTimingResp(PacketPtr pkt);
+    virtual void recvReqRetry();
+
+  public:
+    SMMUMasterTableWalkPort(const std::string &_name, SMMUv3 &_smmu);
+    virtual ~SMMUMasterTableWalkPort() {}
+};
+
+class SMMUSlavePort : public QueuedSlavePort
+{
+  protected:
+    SMMUv3SlaveInterface &ifc;
+    RespPacketQueue respQueue;
+
+    virtual void recvFunctional(PacketPtr pkt);
+    virtual Tick recvAtomic(PacketPtr pkt);
+    virtual bool recvTimingReq(PacketPtr pkt);
+
+  public:
+    SMMUSlavePort(const std::string &_name,
+                  SMMUv3SlaveInterface &_ifc,
+                  PortID _id = InvalidPortID);
+    virtual ~SMMUSlavePort() {}
+
+    virtual AddrRangeList getAddrRanges() const
+    { return AddrRangeList { AddrRange(0, UINT64_MAX) }; }
+};
+
+class SMMUControlPort : public SimpleTimingPort
+{
+  protected:
+    SMMUv3 &smmu;
+    AddrRange addrRange;
+
+    virtual Tick recvAtomic(PacketPtr pkt);
+    virtual AddrRangeList getAddrRanges() const;
+
+  public:
+    SMMUControlPort(const std::string &_name, SMMUv3 &_smmu,
+                    AddrRange _addrRange);
+    virtual ~SMMUControlPort() {}
+};
+
+class SMMUATSMasterPort : public QueuedMasterPort
+{
+  protected:
+    SMMUv3SlaveInterface &ifc;
+    ReqPacketQueue reqQueue;
+    SnoopRespPacketQueue snoopRespQueue;
+
+    virtual bool recvTimingResp(PacketPtr pkt);
+
+  public:
+    SMMUATSMasterPort(const std::string &_name, SMMUv3SlaveInterface &_ifc);
+    virtual ~SMMUATSMasterPort() {}
+};
+
+class SMMUATSSlavePort : public QueuedSlavePort
+{
+  protected:
+    SMMUv3SlaveInterface &ifc;
+    RespPacketQueue respQueue;
+
+    virtual void recvFunctional(PacketPtr pkt);
+    virtual Tick recvAtomic(PacketPtr pkt);
+    virtual bool recvTimingReq(PacketPtr pkt);
+
+    virtual AddrRangeList getAddrRanges() const
+    { return AddrRangeList(); }
+
+  public:
+    SMMUATSSlavePort(const std::string &_name, SMMUv3SlaveInterface &_ifc);
+    virtual ~SMMUATSSlavePort() {}
+};
+
+#endif /* __DEV_ARM_SMMU_V3_PORTS_HH__ */
diff --git a/src/dev/arm/smmu_v3_proc.cc b/src/dev/arm/smmu_v3_proc.cc
new file mode 100644
index 0000000..71888bd
--- /dev/null
+++ b/src/dev/arm/smmu_v3_proc.cc
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#include "dev/arm/smmu_v3_proc.hh"
+
+#include "dev/arm/smmu_v3.hh"
+#include "sim/system.hh"
+
+SMMUProcess::SMMUProcess(const std::string &name, SMMUv3 &_smmu) :
+    coroutine(NULL),
+    myName(name),
+    smmu(_smmu)
+{}
+
+SMMUProcess::~SMMUProcess()
+{
+    delete coroutine;
+}
+
+void
+SMMUProcess::wakeup()
+{
+    smmu.runProcess(this, NULL);
+}
+
+void
+SMMUProcess::reinit()
+{
+    delete coroutine;
+    coroutine = new Coroutine(
+        std::bind(&SMMUProcess::main, this, std::placeholders::_1));
+}
+
+void
+SMMUProcess::doRead(Yield &yield, Addr addr, void *ptr, size_t size)
+{
+    doSemaphoreDown(yield, smmu.masterPortSem);
+    doDelay(yield, Cycles(1)); // request - assume 1 cycle
+    doSemaphoreUp(smmu.masterPortSem);
+
+    SMMUAction a;
+    a.type = ACTION_SEND_REQ;
+
+    RequestPtr req = std::make_shared<Request>(
+        addr, size, 0, smmu.masterId);
+
+    req->taskId(ContextSwitchTaskId::DMA);
+
+    a.pkt = new Packet(req, MemCmd::ReadReq);
+    a.pkt->dataStatic(ptr);
+
+    a.delay = 0;
+
+    PacketPtr pkt = yield(a).get();
+
+    assert(pkt);
+    // >= because we may get the whole cache line
+    assert(pkt->getSize() >= size);
+
+    delete pkt;
+}
+
+void
+SMMUProcess::doWrite(Yield &yield, Addr addr, const void *ptr, size_t size)
+{
+    unsigned nbeats = (size + (smmu.masterPortWidth-1)) / smmu.masterPortWidth;
+
+    doSemaphoreDown(yield, smmu.masterPortSem);
+    doDelay(yield, Cycles(nbeats));
+    doSemaphoreUp(smmu.masterPortSem);
+
+
+    SMMUAction a;
+    a.type = ACTION_SEND_REQ;
+
+    RequestPtr req = std::make_shared<Request>(
+        addr, size, 0, smmu.masterId);
+
+    req->taskId(ContextSwitchTaskId::DMA);
+
+    a.pkt = new Packet(req, MemCmd::WriteReq);
+    a.pkt->dataStatic(ptr);
+
+    PacketPtr pkt = yield(a).get();
+
+    delete pkt;
+}
+
+void
+SMMUProcess::doDelay(Yield &yield, Cycles cycles)
+{
+    if (smmu.system.isTimingMode())
+        scheduleWakeup(smmu.clockEdge(cycles));
+
+    SMMUAction a;
+    a.type = ACTION_DELAY;
+    a.delay = cycles * smmu.clockPeriod();
+    yield(a);
+}
+
+void
+SMMUProcess::doSleep(Yield &yield)
+{
+    SMMUAction a;
+    a.type = ACTION_SLEEP;
+    yield(a);
+}
+
+void
+SMMUProcess::doSemaphoreDown(Yield &yield, SMMUSemaphore &sem)
+{
+    while (sem.count == 0) {
+        sem.queue.push(this);
+        doSleep(yield);
+    }
+
+    sem.count--;
+    return;
+}
+
+void
+SMMUProcess::doSemaphoreUp(SMMUSemaphore &sem)
+{
+    sem.count++;
+    if (!sem.queue.empty()) {
+        SMMUProcess *next_proc = sem.queue.front();
+        sem.queue.pop();
+
+        // Schedule event in the current tick instead of
+        // calling the function directly to avoid overflowing
+        // the stack in this coroutine.
+        next_proc->scheduleWakeup(curTick());
+    }
+}
+
+void
+SMMUProcess::doWaitForSignal(Yield &yield, SMMUSignal &sig)
+{
+    sig.waiting.push_back(this);
+    doSleep(yield);
+}
+
+void
+SMMUProcess::doBroadcastSignal(SMMUSignal &sig)
+{
+    if (!sig.waiting.empty()) {
+        for (auto it : sig.waiting) {
+            // Schedule event in the current tick instead of
+            // calling the function directly to avoid overflowing
+            // the stack in this coroutine.
+            it->scheduleWakeup(curTick());
+        }
+
+        sig.waiting.clear();
+    }
+}
+
+void
+SMMUProcess::scheduleWakeup(Tick when)
+{
+    auto *ep = new EventWrapper<
+        SMMUProcess, &SMMUProcess::wakeup> (this, true);
+
+    smmu.schedule(ep, when);
+}
+
+SMMUAction
+SMMUProcess::run(PacketPtr pkt)
+{
+    assert(coroutine != NULL);
+    assert(*coroutine);
+    return (*coroutine)(pkt).get();
+}
diff --git a/src/dev/arm/smmu_v3_proc.hh b/src/dev/arm/smmu_v3_proc.hh
new file mode 100644
index 0000000..9444c01
--- /dev/null
+++ b/src/dev/arm/smmu_v3_proc.hh
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_PROC_HH__
+#define __DEV_ARM_SMMU_V3_PROC_HH__
+
+#include <list>
+#include <queue>
+#include <string>
+
+#include "base/coroutine.hh"
+#include "base/types.hh"
+#include "mem/packet.hh"
+
+class SMMUv3SlaveInterface;
+
+/*
+ * The meaning of these becomes apparent when you
+ * look at runProcessAtomic()/runProcessTiming().
+ */
+enum SMMUActionType {
+    ACTION_INITIAL_NOP,
+    ACTION_SEND_REQ,
+    ACTION_SEND_REQ_FINAL,
+    ACTION_SEND_RESP,
+    ACTION_SEND_RESP_ATS,
+    ACTION_DELAY,
+    ACTION_SLEEP,
+    ACTION_TERMINATE,
+};
+
+struct SMMUAction
+{
+    SMMUActionType type;
+    PacketPtr pkt;
+    SMMUv3SlaveInterface *ifc;
+    Tick delay;
+};
+
+class SMMUv3;
+class SMMUProcess;
+
+struct SMMUSemaphore
+{
+    explicit SMMUSemaphore(unsigned _max) :
+        count(_max), max(_max)
+    {}
+
+    unsigned count;
+    unsigned max;
+    std::queue<SMMUProcess *> queue;
+};
+
+struct SMMUSignal
+{
+    std::list<SMMUProcess *> waiting;
+};
+
+class SMMUProcess : public Packet::SenderState
+{
+  private:
+    typedef m5::Coroutine<PacketPtr, SMMUAction> Coroutine;
+
+    Coroutine *coroutine;
+    std::string myName;
+
+    void wakeup();
+
+  protected:
+    typedef Coroutine::CallerType Yield;
+
+    SMMUv3 &smmu;
+
+    void reinit();
+
+    virtual void main(Yield &yield) = 0;
+
+    void doRead(Yield &yield, Addr addr, void *ptr, size_t size);
+    void doWrite(Yield &yield, Addr addr, const void *ptr, size_t size);
+    void doDelay(Yield &yield, Cycles cycles);
+    void doSleep(Yield &yield);
+
+    void doSemaphoreDown(Yield &yield, SMMUSemaphore &sem);
+    void doSemaphoreUp(SMMUSemaphore &sem);
+
+    void doWaitForSignal(Yield &yield, SMMUSignal &sig);
+    void doBroadcastSignal(SMMUSignal &sig);
+
+    void scheduleWakeup(Tick when);
+
+ public:
+    SMMUProcess(const std::string &name, SMMUv3 &_smmu);
+    virtual ~SMMUProcess();
+
+    SMMUAction run(PacketPtr pkt);
+
+    const std::string name() const { return myName; };
+};
+
+#endif /* __DEV_ARM_SMMU_V3_PROC_HH__ */
diff --git a/src/dev/arm/smmu_v3_ptops.cc b/src/dev/arm/smmu_v3_ptops.cc
new file mode 100644
index 0000000..6490b34
--- /dev/null
+++ b/src/dev/arm/smmu_v3_ptops.cc
@@ -0,0 +1,316 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#include "dev/arm/smmu_v3_ptops.hh"
+
+#include "base/bitfield.hh"
+#include "base/logging.hh"
+
+bool
+V7LPageTableOps::isValid(pte_t pte, unsigned level) const
+{
+    switch (level) {
+        case 1: return  pte & 0x1;
+        case 2: return  pte & 0x1;
+        case 3: return (pte & 0x1) && (pte & 0x2);
+        default: panic("bad level %d", level);
+    }
+}
+
+bool
+V7LPageTableOps::isLeaf(pte_t pte, unsigned level) const
+{
+    switch (level) {
+        case 1: return !(pte & 0x2);
+        case 2: return !(pte & 0x2);
+        case 3: return true;
+        default: panic("bad level %d", level);
+    }
+}
+
+bool
+V7LPageTableOps::isWritable(pte_t pte, unsigned level, bool stage2) const
+{
+    return stage2 ? bits(pte, 7, 6)==3 : bits(pte, 7)==0;
+}
+
+Addr
+V7LPageTableOps::nextLevelPointer(pte_t pte, unsigned level) const
+{
+    if (isLeaf(pte, level)) {
+        switch (level) {
+            case 1: return mbits(pte, 39, 30);
+            case 2: return mbits(pte, 39, 21);
+            case 3: return mbits(pte, 39, 12);
+            default: panic("bad level %d", level);
+        }
+    } else {
+        return mbits(pte, 39, 12);
+    }
+}
+
+Addr
+V7LPageTableOps::index(Addr va, unsigned level) const
+{
+    // In theory this should be configurable...
+    const int n = 12;
+
+    switch (level) {
+        case 1: return bits(va, 26+n, 30) << 3; break;
+        case 2: return bits(va, 29, 21) << 3; break;
+        case 3: return bits(va, 20, 12) << 3; break;
+        default: panic("bad level %d", level);
+    }
+}
+
+Addr
+V7LPageTableOps::pageMask(pte_t pte, unsigned level) const
+{
+    switch (level) {
+        case 1: return ~mask(30);
+        case 2: return ~mask(21);
+        case 3: return bits(pte, 52) ? ~mask(16) : ~mask(12);
+        default: panic("bad level %d", level);
+    }
+}
+
+Addr
+V7LPageTableOps::walkMask(unsigned level) const
+{
+    switch (level) {
+        case 1: return mask(39, 30);
+        case 2: return mask(39, 21);
+        case 3: return mask(39, 12);
+        default: panic("bad level %d", level);
+    }
+}
+
+unsigned
+V7LPageTableOps::firstLevel() const
+{
+    return 1;
+}
+
+unsigned
+V7LPageTableOps::lastLevel() const
+{
+    return 3;
+}
+
+bool
+V8PageTableOps4k::isValid(pte_t pte, unsigned level) const
+{
+    switch (level) {
+        case 0: return  pte & 0x1;
+        case 1: return  pte & 0x1;
+        case 2: return  pte & 0x1;
+        case 3: return (pte & 0x1) && (pte & 0x2);
+        default: panic("bad level %d", level);
+    }
+}
+
+bool
+V8PageTableOps4k::isLeaf(pte_t pte, unsigned level) const
+{
+    switch (level) {
+        case 0: return false;
+        case 1: return !(pte & 0x2);
+        case 2: return !(pte & 0x2);
+        case 3: return true;
+        default: panic("bad level %d", level);
+    }
+}
+
+bool
+V8PageTableOps4k::isWritable(pte_t pte, unsigned level, bool stage2) const
+{
+    return stage2 ? bits(pte, 7, 6)==3 : bits(pte, 7)==0;
+}
+
+Addr
+V8PageTableOps4k::nextLevelPointer(pte_t pte, unsigned level) const
+{
+    if (isLeaf(pte, level)) {
+        switch (level) {
+            // no level 0 here
+            case 1: return mbits(pte, 47, 30);
+            case 2: return mbits(pte, 47, 21);
+            case 3: return mbits(pte, 47, 12);
+            default: panic("bad level %d", level);
+        }
+    } else {
+        return mbits(pte, 47, 12);
+    }
+}
+
+Addr
+V8PageTableOps4k::index(Addr va, unsigned level) const
+{
+    switch (level) {
+        case 0: return bits(va, 47, 39) << 3; break;
+        case 1: return bits(va, 38, 30) << 3; break;
+        case 2: return bits(va, 29, 21) << 3; break;
+        case 3: return bits(va, 20, 12) << 3; break;
+        default: panic("bad level %d", level);
+    }
+}
+
+Addr
+V8PageTableOps4k::pageMask(pte_t pte, unsigned level) const
+{
+    switch (level) {
+        // no level 0 here
+        case 1: return ~mask(30);
+        case 2: return ~mask(21);
+        case 3: return bits(pte, 52) ? ~mask(16) : ~mask(12);
+        default: panic("bad level %d", level);
+    }
+}
+
+Addr
+V8PageTableOps4k::walkMask(unsigned level) const
+{
+    switch (level) {
+        case 0: return mask(47, 39);
+        case 1: return mask(47, 30);
+        case 2: return mask(47, 21);
+        case 3: return mask(47, 12);
+        default: panic("bad level %d", level);
+    }
+}
+
+unsigned
+V8PageTableOps4k::firstLevel() const
+{
+    return 0;
+}
+
+unsigned
+V8PageTableOps4k::lastLevel() const
+{
+    return 3;
+}
+
+bool
+V8PageTableOps64k::isValid(pte_t pte, unsigned level) const
+{
+    switch (level) {
+        case 1: return  pte & 0x1;
+        case 2: return  pte & 0x1;
+        case 3: return (pte & 0x1) && (pte & 0x2);
+        default: panic("bad level %d", level);
+    }
+}
+
+bool
+V8PageTableOps64k::isLeaf(pte_t pte, unsigned level) const
+{
+    switch (level) {
+        case 1: return false;
+        case 2: return !(pte & 0x2);
+        case 3: return true;
+        default: panic("bad level %d", level);
+    }
+}
+
+bool
+V8PageTableOps64k::isWritable(pte_t pte, unsigned level, bool stage2) const
+{
+    return stage2 ? bits(pte, 7, 6)==3 : bits(pte, 7)==0;
+}
+
+Addr
+V8PageTableOps64k::nextLevelPointer(pte_t pte, unsigned level) const
+{
+    if (isLeaf(pte, level)) {
+        switch (level) {
+            // no level 1 here
+            case 2: return mbits(pte, 47, 29);
+            case 3: return mbits(pte, 47, 16);
+            default: panic("bad level %d", level);
+        }
+    } else {
+        return mbits(pte, 47, 16);
+    }
+}
+
+Addr
+V8PageTableOps64k::index(Addr va, unsigned level) const
+{
+    switch (level) {
+        case 1: return bits(va, 47, 42) << 3; break;
+        case 2: return bits(va, 41, 29) << 3; break;
+        case 3: return bits(va, 28, 16) << 3; break;
+        default: panic("bad level %d", level);
+    }
+}
+
+Addr
+V8PageTableOps64k::pageMask(pte_t pte, unsigned level) const
+{
+    switch (level) {
+        // no level 1 here
+        case 2: return ~mask(29);
+        case 3: return bits(pte, 52) ? ~mask(21) : ~mask(16);
+        default: panic("bad level %d", level);
+    }
+}
+
+Addr
+V8PageTableOps64k::walkMask(unsigned level) const
+{
+    switch (level) {
+        case 1: return mask(47, 42);
+        case 2: return mask(47, 29);
+        case 3: return mask(47, 16);
+        default: panic("bad level %d", level);
+    }
+}
+
+unsigned
+V8PageTableOps64k::firstLevel() const
+{
+    return 1;
+}
+
+unsigned
+V8PageTableOps64k::lastLevel() const
+{
+    return 3;
+}
diff --git a/src/dev/arm/smmu_v3_ptops.hh b/src/dev/arm/smmu_v3_ptops.hh
new file mode 100644
index 0000000..ef9ef04
--- /dev/null
+++ b/src/dev/arm/smmu_v3_ptops.hh
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_PTOPS_HH__
+#define __DEV_ARM_SMMU_V3_PTOPS_HH__
+
+#include <stdint.h>
+
+#include "base/types.hh"
+
+struct PageTableOps
+{
+    typedef int64_t pte_t;
+
+    virtual bool isValid(pte_t pte, unsigned level) const = 0;
+    virtual bool isLeaf(pte_t pte, unsigned level) const = 0;
+    virtual bool isWritable(pte_t pte, unsigned level, bool stage2) const = 0;
+    virtual Addr nextLevelPointer(pte_t pte, unsigned level) const = 0;
+    virtual Addr index(Addr va, unsigned level) const = 0;
+    virtual Addr pageMask(pte_t pte, unsigned level) const = 0;
+    virtual Addr walkMask(unsigned level) const = 0;
+    virtual unsigned firstLevel() const = 0;
+    virtual unsigned lastLevel() const = 0;
+};
+
+struct V7LPageTableOps : public PageTableOps
+{
+    virtual bool isValid(pte_t pte, unsigned level) const;
+    virtual bool isLeaf(pte_t pte, unsigned level) const;
+    virtual bool isWritable(pte_t pte, unsigned level, bool stage2) const;
+    virtual Addr nextLevelPointer(pte_t pte, unsigned level) const;
+    virtual Addr index(Addr va, unsigned level) const;
+    virtual Addr pageMask(pte_t pte, unsigned level) const;
+    virtual Addr walkMask(unsigned level) const;
+    virtual unsigned firstLevel() const;
+    virtual unsigned lastLevel() const;
+};
+
+struct V8PageTableOps4k : public PageTableOps
+{
+    virtual bool isValid(pte_t pte, unsigned level) const;
+    virtual bool isLeaf(pte_t pte, unsigned level) const;
+    virtual bool isWritable(pte_t pte, unsigned level, bool stage2) const;
+    virtual Addr nextLevelPointer(pte_t pte, unsigned level) const;
+    virtual Addr index(Addr va, unsigned level) const;
+    virtual Addr pageMask(pte_t pte, unsigned level) const;
+    virtual Addr walkMask(unsigned level) const;
+    virtual unsigned firstLevel() const;
+    virtual unsigned lastLevel() const;
+};
+
+struct V8PageTableOps64k : public PageTableOps
+{
+    virtual bool isValid(pte_t pte, unsigned level) const;
+    virtual bool isLeaf(pte_t pte, unsigned level) const;
+    virtual bool isWritable(pte_t pte, unsigned level, bool stage2) const;
+    virtual Addr nextLevelPointer(pte_t pte, unsigned level) const;
+    virtual Addr index(Addr va, unsigned level) const;
+    virtual Addr pageMask(pte_t pte, unsigned level) const;
+    virtual Addr walkMask(unsigned level) const;
+    virtual unsigned firstLevel() const;
+    virtual unsigned lastLevel() const;
+};
+
+#endif /* __DEV_ARM_SMMU_V3_PTOPS_HH__ */
diff --git a/src/dev/arm/smmu_v3_slaveifc.cc b/src/dev/arm/smmu_v3_slaveifc.cc
new file mode 100644
index 0000000..72c319d
--- /dev/null
+++ b/src/dev/arm/smmu_v3_slaveifc.cc
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ *          Giacomo Travaglini
+ */
+
+#include "dev/arm/smmu_v3_slaveifc.hh"
+
+#include "debug/SMMUv3.hh"
+#include "dev/arm/smmu_v3.hh"
+#include "dev/arm/smmu_v3_transl.hh"
+
+SMMUv3SlaveInterface::SMMUv3SlaveInterface(
+    const SMMUv3SlaveInterfaceParams *p) :
+    MemObject(p),
+    smmu(nullptr),
+    microTLB(new SMMUTLB(p->utlb_entries,
+                         p->utlb_assoc,
+                         p->utlb_policy)),
+    mainTLB(new SMMUTLB(p->tlb_entries,
+                        p->tlb_assoc,
+                        p->tlb_policy)),
+    microTLBEnable(p->utlb_enable),
+    mainTLBEnable(p->tlb_enable),
+    slavePortSem(1),
+    microTLBSem(p->utlb_slots),
+    mainTLBSem(p->tlb_slots),
+    microTLBLat(p->utlb_lat),
+    mainTLBLat(p->tlb_lat),
+    slavePort(new SMMUSlavePort(csprintf("%s.slave", name()), *this)),
+    atsSlavePort(name() + ".atsSlave", *this),
+    atsMasterPort(name() + ".atsMaster", *this),
+    portWidth(p->port_width),
+    wrBufSlotsRemaining(p->wrbuf_slots),
+    xlateSlotsRemaining(p->xlate_slots),
+    prefetchEnable(p->prefetch_enable),
+    prefetchReserveLastWay(
+        p->prefetch_reserve_last_way),
+    deviceNeedsRetry(false),
+    atsDeviceNeedsRetry(false),
+    sendDeviceRetryEvent(*this),
+    atsSendDeviceRetryEvent(this)
+{}
+
+void
+SMMUv3SlaveInterface::sendRange()
+{
+    if (slavePort->isConnected()) {
+        inform("Slave port is connected to %d\n",
+                slavePort->getMasterPort().name());
+
+        slavePort->sendRangeChange();
+    } else {
+        fatal("Slave port is not connected.\n");
+    }
+}
+
+Port&
+SMMUv3SlaveInterface::getPort(const std::string &name, PortID id)
+{
+    if (name == "ats_master") {
+        return atsMasterPort;
+    } else if (name == "slave") {
+        return *slavePort;
+    } else if (name == "ats_slave") {
+        return atsSlavePort;
+    } else {
+        return MemObject::getPort(name, id);
+    }
+}
+
+void
+SMMUv3SlaveInterface::schedTimingResp(PacketPtr pkt)
+{
+    slavePort->schedTimingResp(pkt, nextCycle());
+}
+
+void
+SMMUv3SlaveInterface::schedAtsTimingResp(PacketPtr pkt)
+{
+    atsSlavePort.schedTimingResp(pkt, nextCycle());
+
+    if (atsDeviceNeedsRetry) {
+        atsDeviceNeedsRetry = false;
+        schedule(atsSendDeviceRetryEvent, nextCycle());
+    }
+}
+
+Tick
+SMMUv3SlaveInterface::recvAtomic(PacketPtr pkt)
+{
+    DPRINTF(SMMUv3, "[a] req from %s addr=%#x size=%#x\n",
+            slavePort->getMasterPort().name(),
+            pkt->getAddr(), pkt->getSize());
+
+    std::string proc_name = csprintf("%s.port", name());
+    SMMUTranslationProcess proc(proc_name, *smmu, *this);
+    proc.beginTransaction(SMMUTranslRequest::fromPacket(pkt));
+
+    SMMUAction a = smmu->runProcessAtomic(&proc, pkt);
+    assert(a.type == ACTION_SEND_RESP);
+
+    return a.delay;
+}
+
+bool
+SMMUv3SlaveInterface::recvTimingReq(PacketPtr pkt)
+{
+    DPRINTF(SMMUv3, "[t] req from %s addr=%#x size=%#x\n",
+            slavePort->getMasterPort().name(),
+            pkt->getAddr(), pkt->getSize());
+
+    // @todo: We need to pay for this and not just zero it out
+    pkt->headerDelay = pkt->payloadDelay = 0;
+
+    unsigned nbeats =
+        (pkt->getSize() + (portWidth-1)) / portWidth;
+
+    if (xlateSlotsRemaining==0 ||
+        (pkt->isWrite() && wrBufSlotsRemaining < nbeats))
+    {
+        deviceNeedsRetry = true;
+        return false;
+    }
+
+    xlateSlotsRemaining--;
+    if (pkt->isWrite())
+        wrBufSlotsRemaining -= nbeats;
+
+    std::string proc_name = csprintf("%s.port", name());
+    SMMUTranslationProcess *proc =
+        new SMMUTranslationProcess(proc_name, *smmu, *this);
+    proc->beginTransaction(SMMUTranslRequest::fromPacket(pkt));
+
+    smmu->runProcessTiming(proc, pkt);
+
+    return true;
+}
+
+Tick
+SMMUv3SlaveInterface::atsSlaveRecvAtomic(PacketPtr pkt)
+{
+    DPRINTF(SMMUv3, "[a] ATS slave  req  addr=%#x size=%#x\n",
+            pkt->getAddr(), pkt->getSize());
+
+    std::string proc_name = csprintf("%s.atsport", name());
+    const bool ats_request = true;
+    SMMUTranslationProcess proc(
+        proc_name, *smmu, *this);
+    proc.beginTransaction(SMMUTranslRequest::fromPacket(pkt, ats_request));
+
+    SMMUAction a = smmu->runProcessAtomic(&proc, pkt);
+    assert(a.type == ACTION_SEND_RESP_ATS);
+
+    return a.delay;
+}
+
+bool
+SMMUv3SlaveInterface::atsSlaveRecvTimingReq(PacketPtr pkt)
+{
+    DPRINTF(SMMUv3, "[t] ATS slave  req  addr=%#x size=%#x\n",
+            pkt->getAddr(), pkt->getSize());
+
+    // @todo: We need to pay for this and not just zero it out
+    pkt->headerDelay = pkt->payloadDelay = 0;
+
+    if (xlateSlotsRemaining == 0) {
+        deviceNeedsRetry = true;
+        return false;
+    }
+
+    xlateSlotsRemaining--;
+
+    std::string proc_name = csprintf("%s.atsport", name());
+    const bool ats_request = true;
+    SMMUTranslationProcess *proc =
+        new SMMUTranslationProcess(proc_name, *smmu, *this);
+    proc->beginTransaction(SMMUTranslRequest::fromPacket(pkt, ats_request));
+
+    smmu->runProcessTiming(proc, pkt);
+
+    return true;
+}
+
+bool
+SMMUv3SlaveInterface::atsMasterRecvTimingResp(PacketPtr pkt)
+{
+    DPRINTF(SMMUv3, "[t] ATS master resp addr=%#x size=%#x\n",
+            pkt->getAddr(), pkt->getSize());
+
+    // @todo: We need to pay for this and not just zero it out
+    pkt->headerDelay = pkt->payloadDelay = 0;
+
+    SMMUProcess *proc =
+        safe_cast<SMMUProcess *>(pkt->popSenderState());
+
+    smmu->runProcessTiming(proc, pkt);
+
+    return true;
+}
+
+void
+SMMUv3SlaveInterface::sendDeviceRetry()
+{
+    slavePort->sendRetryReq();
+}
+
+void
+SMMUv3SlaveInterface::atsSendDeviceRetry()
+{
+    DPRINTF(SMMUv3, "ATS retry\n");
+    atsSlavePort.sendRetryReq();
+}
+
+void
+SMMUv3SlaveInterface::scheduleDeviceRetry()
+{
+    if (deviceNeedsRetry && !sendDeviceRetryEvent.scheduled()) {
+        DPRINTF(SMMUv3, "sched slave retry\n");
+        deviceNeedsRetry = false;
+        schedule(sendDeviceRetryEvent, nextCycle());
+    }
+}
+
+SMMUv3SlaveInterface*
+SMMUv3SlaveInterfaceParams::create()
+{
+    return new SMMUv3SlaveInterface(this);
+}
diff --git a/src/dev/arm/smmu_v3_slaveifc.hh b/src/dev/arm/smmu_v3_slaveifc.hh
new file mode 100644
index 0000000..a782ff9
--- /dev/null
+++ b/src/dev/arm/smmu_v3_slaveifc.hh
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_SLAVEIFC_HH__
+#define __DEV_ARM_SMMU_V3_SLAVEIFC_HH__
+
+#include <list>
+
+#include "dev/arm/smmu_v3_caches.hh"
+#include "dev/arm/smmu_v3_defs.hh"
+#include "dev/arm/smmu_v3_events.hh"
+#include "dev/arm/smmu_v3_ports.hh"
+#include "dev/arm/smmu_v3_proc.hh"
+#include "mem/mem_object.hh"
+#include "params/SMMUv3SlaveInterface.hh"
+
+class SMMUTranslationProcess;
+class SMMUv3;
+class SMMUSlavePort;
+
+class SMMUv3SlaveInterface : public MemObject
+{
+  public:
+    SMMUv3 *smmu;
+    SMMUTLB* microTLB;
+    SMMUTLB* mainTLB;
+
+    const bool microTLBEnable;
+    const bool mainTLBEnable;
+
+    SMMUSemaphore slavePortSem;
+    SMMUSemaphore microTLBSem;
+    SMMUSemaphore mainTLBSem;
+
+    const Cycles microTLBLat;
+    const Cycles mainTLBLat;
+
+    SMMUSlavePort *slavePort;
+    SMMUATSSlavePort  atsSlavePort;
+    SMMUATSMasterPort atsMasterPort;
+
+    // in bytes
+    const unsigned portWidth;
+
+    unsigned wrBufSlotsRemaining;
+    unsigned xlateSlotsRemaining;
+
+    const bool prefetchEnable;
+    const bool prefetchReserveLastWay;
+
+    std::list<SMMUTranslationProcess *> duplicateReqs;
+    SMMUSignal duplicateReqRemoved;
+
+    std::list<SMMUTranslationProcess *> dependentReads[SMMU_MAX_TRANS_ID];
+    std::list<SMMUTranslationProcess *> dependentWrites[SMMU_MAX_TRANS_ID];
+    SMMUSignal dependentReqRemoved;
+
+    // Receiving translation requests from the master device
+    Tick recvAtomic(PacketPtr pkt);
+    bool recvTimingReq(PacketPtr pkt);
+    void schedTimingResp(PacketPtr pkt);
+
+    Tick atsSlaveRecvAtomic(PacketPtr pkt);
+    bool atsSlaveRecvTimingReq(PacketPtr pkt);
+    bool atsMasterRecvTimingResp(PacketPtr pkt);
+    void schedAtsTimingResp(PacketPtr pkt);
+
+    void scheduleDeviceRetry();
+    void sendDeviceRetry();
+    void atsSendDeviceRetry();
+
+    bool deviceNeedsRetry;
+    bool atsDeviceNeedsRetry;
+
+    SMMUDeviceRetryEvent sendDeviceRetryEvent;
+    EventWrapper<
+        SMMUv3SlaveInterface,
+        &SMMUv3SlaveInterface::atsSendDeviceRetry> atsSendDeviceRetryEvent;
+
+    Port& getPort(const std::string &name, PortID id);
+
+  public:
+    SMMUv3SlaveInterface(const SMMUv3SlaveInterfaceParams *p);
+
+    ~SMMUv3SlaveInterface()
+    {
+        delete microTLB;
+        delete mainTLB;
+    }
+
+    void setSMMU(SMMUv3 *_smmu) { smmu = _smmu; }
+    void sendRange();
+};
+
+#endif /* __DEV_ARM_SMMU_V3_SLAVEIFC_HH__ */
diff --git a/src/dev/arm/smmu_v3_transl.cc b/src/dev/arm/smmu_v3_transl.cc
new file mode 100644
index 0000000..4a3efc6
--- /dev/null
+++ b/src/dev/arm/smmu_v3_transl.cc
@@ -0,0 +1,1443 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#include "dev/arm/smmu_v3_transl.hh"
+
+#include "debug/SMMUv3.hh"
+#include "debug/SMMUv3Hazard.hh"
+#include "dev/arm/amba.hh"
+#include "dev/arm/smmu_v3.hh"
+#include "sim/system.hh"
+
+SMMUTranslRequest
+SMMUTranslRequest::fromPacket(PacketPtr pkt, bool ats)
+{
+    SMMUTranslRequest req;
+    req.addr         = pkt->getAddr();
+    req.size         = pkt->getSize();
+    req.sid          = pkt->req->streamId();
+    req.ssid         = pkt->req->hasSubstreamId() ?
+        pkt->req->substreamId() : 0;
+    req.isWrite      = pkt->isWrite();
+    req.isPrefetch   = false;
+    req.isAtsRequest = ats;
+    req.pkt          = pkt;
+
+    return req;
+}
+
+SMMUTranslRequest
+SMMUTranslRequest::prefetch(Addr addr, uint32_t sid, uint32_t ssid)
+{
+    SMMUTranslRequest req;
+    req.addr         = addr;
+    req.size         = 0;
+    req.sid          = sid;
+    req.ssid         = ssid;
+    req.isWrite      = false;
+    req.isPrefetch   = true;
+    req.isAtsRequest = false;
+    req.pkt          = NULL;
+
+    return req;
+}
+
+void
+SMMUTranslationProcess::beginTransaction(const SMMUTranslRequest &req)
+{
+    request = req;
+
+    reinit();
+}
+
+void
+SMMUTranslationProcess::resumeTransaction()
+{
+    assert(smmu.system.isTimingMode());
+
+    assert(!"Stalls are broken");
+
+    Tick resumeTick = curTick();
+
+    (void) resumeTick;
+    DPRINTF(SMMUv3, "Resume at tick = %d. Fault duration = %d (%.3fus)\n",
+        resumeTick, resumeTick-faultTick, (resumeTick-faultTick) / 1e6);
+
+    beginTransaction(request);
+
+    smmu.runProcessTiming(this, request.pkt);
+}
+
+void
+SMMUTranslationProcess::main(Yield &yield)
+{
+    // Hack:
+    // The coroutine starts running as soon as it's created.
+    // But we need to wait for request data esp. in atomic mode.
+    SMMUAction a;
+    a.type = ACTION_INITIAL_NOP;
+    a.pkt = NULL;
+    yield(a);
+
+    const Addr next4k = (request.addr + 0x1000ULL) & ~0xfffULL;
+
+    if ((request.addr + request.size) > next4k)
+        panic("Transaction crosses 4k boundary (addr=%#x size=%#x)!\n",
+                request.addr, request.size);
+
+
+    unsigned numSlaveBeats = request.isWrite ?
+        (request.size + (ifc.portWidth - 1)) / ifc.portWidth : 1;
+
+    doSemaphoreDown(yield, ifc.slavePortSem);
+    doDelay(yield, Cycles(numSlaveBeats));
+    doSemaphoreUp(ifc.slavePortSem);
+
+
+    recvTick = curTick();
+
+
+    if (!(smmu.regs.cr0 & 0x1)) {
+        // SMMU disabled
+        doDelay(yield, Cycles(1));
+        completeTransaction(yield, bypass(request.addr));
+        return;
+    }
+
+    TranslResult tr;
+    bool wasPrefetched = false;
+
+    if (request.isPrefetch) {
+        // Abort prefetch if:
+        //   - there's already a transaction looking up the same 4k page, OR
+        //   - requested address is already in the TLB.
+        if (hazard4kCheck() || ifcTLBLookup(yield, tr, wasPrefetched))
+            completePrefetch(yield); // this never returns
+
+        hazard4kRegister();
+
+        tr = smmuTranslation(yield);
+
+        if (tr.fault == FAULT_NONE)
+            ifcTLBUpdate(yield, tr);
+
+        hazard4kRelease();
+
+        completePrefetch(yield);
+    } else {
+        hazardIdRegister();
+
+        if (!microTLBLookup(yield, tr)) {
+            bool hit = ifcTLBLookup(yield, tr, wasPrefetched);
+            if (!hit) {
+                while (!hit && hazard4kCheck()) {
+                    hazard4kHold(yield);
+                    hit = ifcTLBLookup(yield, tr, wasPrefetched);
+                }
+            }
+
+            // Issue prefetch if:
+            //   - there was a TLB hit and the entry was prefetched, OR
+            //   - TLB miss was successfully serviced
+            if (hit) {
+                if (wasPrefetched)
+                    issuePrefetch(next4k);
+            } else {
+                hazard4kRegister();
+
+                tr = smmuTranslation(yield);
+
+                if (tr.fault == FAULT_NONE) {
+                    ifcTLBUpdate(yield, tr);
+
+                    issuePrefetch(next4k);
+                }
+
+                hazard4kRelease();
+            }
+
+            if (tr.fault == FAULT_NONE)
+                microTLBUpdate(yield, tr);
+        }
+
+        hazardIdHold(yield);
+        hazardIdRelease();
+
+        if (tr.fault != FAULT_NONE)
+            panic("fault\n");
+
+        completeTransaction(yield, tr);
+    }
+}
+
+SMMUTranslationProcess::TranslResult
+SMMUTranslationProcess::bypass(Addr addr) const
+{
+    TranslResult tr;
+    tr.fault = FAULT_NONE;
+    tr.addr = addr;
+    tr.addrMask = 0;
+    tr.writable = 1;
+
+    return tr;
+}
+
+SMMUTranslationProcess::TranslResult
+SMMUTranslationProcess::smmuTranslation(Yield &yield)
+{
+    TranslResult tr;
+
+    // Need SMMU credit to proceed
+    doSemaphoreDown(yield, smmu.transSem);
+
+    // Simulate pipelined IFC->SMMU link
+    doSemaphoreDown(yield, smmu.ifcSmmuSem);
+    doDelay(yield, Cycles(1)); // serialize transactions
+    doSemaphoreUp(smmu.ifcSmmuSem);
+    doDelay(yield, smmu.ifcSmmuLat - Cycles(1)); // remaining pipeline delay
+
+    bool haveConfig = true;
+    if (!configCacheLookup(yield, context)) {
+        if(findConfig(yield, context, tr)) {
+            configCacheUpdate(yield, context);
+        } else {
+            haveConfig = false;
+        }
+    }
+
+    if (haveConfig && !smmuTLBLookup(yield, tr)) {
+        // SMMU main TLB miss
+
+        // Need PTW slot to proceed
+        doSemaphoreDown(yield, smmu.ptwSem);
+
+        // Page table walk
+        Tick ptwStartTick = curTick();
+
+        if (context.stage1Enable) {
+            tr = translateStage1And2(yield, request.addr);
+        } else if (context.stage2Enable) {
+            tr = translateStage2(yield, request.addr, true);
+        } else {
+            tr = bypass(request.addr);
+        }
+
+        if (context.stage1Enable || context.stage2Enable)
+            smmu.ptwTimeDist.sample(curTick() - ptwStartTick);
+
+        // Free PTW slot
+        doSemaphoreUp(smmu.ptwSem);
+
+        if (tr.fault == FAULT_NONE)
+            smmuTLBUpdate(yield, tr);
+    }
+
+    // Simulate pipelined SMMU->SLAVE INTERFACE link
+    doSemaphoreDown(yield, smmu.smmuIfcSem);
+    doDelay(yield, Cycles(1)); // serialize transactions
+    doSemaphoreUp(smmu.smmuIfcSem);
+    doDelay(yield, smmu.smmuIfcLat - Cycles(1)); // remaining pipeline delay
+
+    // return SMMU credit
+    doSemaphoreUp(smmu.transSem);
+
+    return tr;
+}
+
+bool
+SMMUTranslationProcess::microTLBLookup(Yield &yield, TranslResult &tr)
+{
+    if (!ifc.microTLBEnable)
+        return false;
+
+    doSemaphoreDown(yield, ifc.microTLBSem);
+    doDelay(yield, ifc.microTLBLat);
+    const SMMUTLB::Entry *e =
+        ifc.microTLB->lookup(request.sid, request.ssid, request.addr);
+    doSemaphoreUp(ifc.microTLBSem);
+
+    if (!e) {
+        DPRINTF(SMMUv3, "micro TLB miss vaddr=%#x sid=%#x ssid=%#x\n",
+            request.addr, request.sid, request.ssid);
+
+        return false;
+    }
+
+    DPRINTF(SMMUv3,
+        "micro TLB hit vaddr=%#x amask=%#x sid=%#x ssid=%#x paddr=%#x\n",
+        request.addr, e->vaMask, request.sid, request.ssid, e->pa);
+
+    tr.fault = FAULT_NONE;
+    tr.addr = e->pa + (request.addr & ~e->vaMask);;
+    tr.addrMask = e->vaMask;
+    tr.writable = e->permissions;
+
+    return true;
+}
+
+bool
+SMMUTranslationProcess::ifcTLBLookup(Yield &yield, TranslResult &tr,
+                                     bool &wasPrefetched)
+{
+    if (!ifc.mainTLBEnable)
+        return false;
+
+    doSemaphoreDown(yield, ifc.mainTLBSem);
+    doDelay(yield, ifc.mainTLBLat);
+    const SMMUTLB::Entry *e =
+        ifc.mainTLB->lookup(request.sid, request.ssid, request.addr);
+    doSemaphoreUp(ifc.mainTLBSem);
+
+    if (!e) {
+        DPRINTF(SMMUv3,
+                "SLAVE Interface TLB miss vaddr=%#x sid=%#x ssid=%#x\n",
+                request.addr, request.sid, request.ssid);
+
+        return false;
+    }
+
+    DPRINTF(SMMUv3,
+            "SLAVE Interface TLB hit vaddr=%#x amask=%#x sid=%#x ssid=%#x "
+            "paddr=%#x\n", request.addr, e->vaMask, request.sid,
+            request.ssid, e->pa);
+
+    tr.fault = FAULT_NONE;
+    tr.addr = e->pa + (request.addr & ~e->vaMask);;
+    tr.addrMask = e->vaMask;
+    tr.writable = e->permissions;
+    wasPrefetched = e->prefetched;
+
+    return true;
+}
+
+bool
+SMMUTranslationProcess::smmuTLBLookup(Yield &yield, TranslResult &tr)
+{
+    if (!smmu.tlbEnable)
+        return false;
+
+    doSemaphoreDown(yield, smmu.tlbSem);
+    doDelay(yield, smmu.tlbLat);
+    const ARMArchTLB::Entry *e =
+        smmu.tlb.lookup(request.addr, context.asid, context.vmid);
+    doSemaphoreUp(smmu.tlbSem);
+
+    if (!e) {
+        DPRINTF(SMMUv3, "SMMU TLB miss vaddr=%#x asid=%#x vmid=%#x\n",
+            request.addr, context.asid, context.vmid);
+
+        return false;
+    }
+
+    DPRINTF(SMMUv3,
+            "SMMU TLB hit vaddr=%#x amask=%#x asid=%#x vmid=%#x paddr=%#x\n",
+            request.addr, e->vaMask, context.asid, context.vmid, e->pa);
+
+    tr.fault = FAULT_NONE;
+    tr.addr = e->pa + (request.addr & ~e->vaMask);;
+    tr.addrMask = e->vaMask;
+    tr.writable = e->permissions;
+
+    return true;
+}
+
+void
+SMMUTranslationProcess::microTLBUpdate(Yield &yield,
+                                       const TranslResult &tr)
+{
+    assert(tr.fault == FAULT_NONE);
+
+    if (!ifc.microTLBEnable)
+        return;
+
+    SMMUTLB::Entry e;
+    e.valid = true;
+    e.prefetched = false;
+    e.sid = request.sid;
+    e.ssid = request.ssid;
+    e.vaMask = tr.addrMask;
+    e.va = request.addr & e.vaMask;
+    e.pa = tr.addr & e.vaMask;
+    e.permissions = tr.writable;
+    e.asid = context.asid;
+    e.vmid = context.vmid;
+
+    doSemaphoreDown(yield, ifc.microTLBSem);
+
+    DPRINTF(SMMUv3,
+        "micro TLB upd vaddr=%#x amask=%#x paddr=%#x sid=%#x ssid=%#x\n",
+        e.va, e.vaMask, e.pa, e.sid, e.ssid);
+
+    ifc.microTLB->store(e, SMMUTLB::ALLOC_ANY_WAY);
+
+    doSemaphoreUp(ifc.microTLBSem);
+}
+
+void
+SMMUTranslationProcess::ifcTLBUpdate(Yield &yield,
+                                     const TranslResult &tr)
+{
+    assert(tr.fault == FAULT_NONE);
+
+    if (!ifc.mainTLBEnable)
+        return;
+
+    SMMUTLB::Entry e;
+    e.valid = true;
+    e.prefetched = request.isPrefetch;
+    e.sid = request.sid;
+    e.ssid = request.ssid;
+    e.vaMask = tr.addrMask;
+    e.va = request.addr & e.vaMask;
+    e.pa = tr.addr & e.vaMask;
+    e.permissions = tr.writable;
+    e.asid = context.asid;
+    e.vmid = context.vmid;
+
+    SMMUTLB::AllocPolicy alloc = SMMUTLB::ALLOC_ANY_WAY;
+    if (ifc.prefetchEnable && ifc.prefetchReserveLastWay)
+        alloc = request.isPrefetch ?
+            SMMUTLB::ALLOC_LAST_WAY : SMMUTLB::ALLOC_ANY_BUT_LAST_WAY;
+
+    doSemaphoreDown(yield, ifc.mainTLBSem);
+
+    DPRINTF(SMMUv3,
+            "SLAVE Interface upd vaddr=%#x amask=%#x paddr=%#x sid=%#x "
+            "ssid=%#x\n", e.va, e.vaMask, e.pa, e.sid, e.ssid);
+
+    ifc.mainTLB->store(e, alloc);
+
+    doSemaphoreUp(ifc.mainTLBSem);
+}
+
+void
+SMMUTranslationProcess::smmuTLBUpdate(Yield &yield,
+                                      const TranslResult &tr)
+{
+    assert(tr.fault == FAULT_NONE);
+
+    if (!smmu.tlbEnable)
+        return;
+
+    ARMArchTLB::Entry e;
+    e.valid = true;
+    e.vaMask = tr.addrMask;
+    e.va = request.addr & e.vaMask;
+    e.asid = context.asid;
+    e.vmid = context.vmid;
+    e.pa = tr.addr & e.vaMask;
+    e.permissions = tr.writable;
+
+    doSemaphoreDown(yield, smmu.tlbSem);
+
+    DPRINTF(SMMUv3,
+            "SMMU TLB upd vaddr=%#x amask=%#x paddr=%#x asid=%#x vmid=%#x\n",
+            e.va, e.vaMask, e.pa, e.asid, e.vmid);
+
+    smmu.tlb.store(e);
+
+    doSemaphoreUp(smmu.tlbSem);
+}
+
+bool
+SMMUTranslationProcess::configCacheLookup(Yield &yield, TranslContext &tc)
+{
+    if (!smmu.configCacheEnable)
+        return false;
+
+    doSemaphoreDown(yield, smmu.configSem);
+    doDelay(yield, smmu.configLat);
+    const ConfigCache::Entry *e =
+        smmu.configCache.lookup(request.sid, request.ssid);
+    doSemaphoreUp(smmu.configSem);
+
+    if (!e) {
+        DPRINTF(SMMUv3, "Config miss sid=%#x ssid=%#x\n",
+                request.sid, request.ssid);
+
+        return false;
+    }
+
+    DPRINTF(SMMUv3, "Config hit sid=%#x ssid=%#x ttb=%#08x asid=%#x\n",
+            request.sid, request.ssid, e->ttb0, e->asid);
+
+    tc.stage1Enable = e->stage1_en;
+    tc.stage2Enable = e->stage2_en;
+
+    tc.ttb0 = e->ttb0;
+    tc.ttb1 = e->ttb1;
+    tc.asid = e->asid;
+    tc.httb = e->httb;
+    tc.vmid = e->vmid;
+
+    tc.stage1TranslGranule = e->stage1_tg;
+    tc.stage2TranslGranule = e->stage2_tg;
+
+    return true;
+}
+
+void
+SMMUTranslationProcess::configCacheUpdate(Yield &yield,
+                                          const TranslContext &tc)
+{
+    if (!smmu.configCacheEnable)
+        return;
+
+    ConfigCache::Entry e;
+    e.valid = true;
+    e.sid = request.sid;
+    e.ssid = request.ssid;
+    e.stage1_en = tc.stage1Enable;
+    e.stage2_en = tc.stage2Enable;
+    e.ttb0 = tc.ttb0;
+    e.ttb1 = tc.ttb1;
+    e.asid = tc.asid;
+    e.httb = tc.httb;
+    e.vmid = tc.vmid;
+    e.stage1_tg = tc.stage1TranslGranule;
+    e.stage2_tg = tc.stage2TranslGranule;
+
+    doSemaphoreDown(yield, smmu.configSem);
+
+    DPRINTF(SMMUv3, "Config upd  sid=%#x ssid=%#x\n", e.sid, e.ssid);
+
+    smmu.configCache.store(e);
+
+    doSemaphoreUp(smmu.configSem);
+}
+
+bool
+SMMUTranslationProcess::findConfig(Yield &yield,
+                                   TranslContext &tc,
+                                   TranslResult &tr)
+{
+    tc.stage1Enable = false;
+    tc.stage2Enable = false;
+
+    StreamTableEntry ste;
+    doReadSTE(yield, ste, request.sid);
+
+    switch (ste.dw0.config) {
+        case STE_CONFIG_BYPASS:
+            break;
+
+        case STE_CONFIG_STAGE1_ONLY:
+            tc.stage1Enable = true;
+            break;
+
+        case STE_CONFIG_STAGE2_ONLY:
+            tc.stage2Enable = true;
+            break;
+
+        case STE_CONFIG_STAGE1_AND_2:
+            tc.stage1Enable = true;
+            tc.stage2Enable = true;
+            break;
+
+        default:
+            panic("Bad or unimplemented STE config %d\n",
+                ste.dw0.config);
+    }
+
+
+    // Establish stage 2 context first since
+    // Context Descriptors can be in IPA space.
+    if (tc.stage2Enable) {
+        tc.httb = ste.dw3.s2ttb << STE_S2TTB_SHIFT;
+        tc.vmid = ste.dw2.s2vmid;
+        tc.stage2TranslGranule = ste.dw2.s2tg;
+    } else {
+        tc.httb = 0xdeadbeef;
+        tc.vmid = 0;
+        tc.stage2TranslGranule = TRANS_GRANULE_INVALID;
+    }
+
+
+    // Now fetch stage 1 config.
+    if (context.stage1Enable) {
+        ContextDescriptor cd;
+        doReadCD(yield, cd, ste, request.sid, request.ssid);
+
+        tc.ttb0 = cd.dw1.ttb0 << CD_TTB_SHIFT;
+        tc.ttb1 = cd.dw2.ttb1 << CD_TTB_SHIFT;
+        tc.asid = cd.dw0.asid;
+        tc.stage1TranslGranule = cd.dw0.tg0;
+    } else {
+        tc.ttb0 = 0xcafebabe;
+        tc.ttb1 = 0xcafed00d;
+        tc.asid = 0;
+        tc.stage1TranslGranule = TRANS_GRANULE_INVALID;
+    }
+
+    return true;
+}
+
+void
+SMMUTranslationProcess::walkCacheLookup(
+        Yield &yield,
+        const WalkCache::Entry *&walkEntry,
+        Addr addr, uint16_t asid, uint16_t vmid,
+        unsigned stage, unsigned level)
+{
+    const char *indent = stage==2 ? "  " : "";
+    (void) indent; // this is only used in DPRINTFs
+
+    const PageTableOps *pt_ops =
+        stage == 1 ?
+            smmu.getPageTableOps(context.stage1TranslGranule) :
+            smmu.getPageTableOps(context.stage2TranslGranule);
+
+    unsigned walkCacheLevels =
+        smmu.walkCacheEnable ?
+            (stage == 1 ? smmu.walkCacheS1Levels : smmu.walkCacheS2Levels) :
+            0;
+
+    if ((1 << level) & walkCacheLevels) {
+        doSemaphoreDown(yield, smmu.walkSem);
+        doDelay(yield, smmu.walkLat);
+
+        walkEntry = smmu.walkCache.lookup(addr, pt_ops->walkMask(level),
+                                          asid, vmid, stage, level);
+
+        if (walkEntry) {
+            DPRINTF(SMMUv3, "%sWalkCache hit  va=%#x asid=%#x vmid=%#x "
+                            "base=%#x (S%d, L%d)\n",
+                    indent, addr, asid, vmid, walkEntry->pa, stage, level);
+        } else {
+            DPRINTF(SMMUv3, "%sWalkCache miss va=%#x asid=%#x vmid=%#x "
+                            "(S%d, L%d)\n",
+                    indent, addr, asid, vmid, stage, level);
+        }
+
+        doSemaphoreUp(smmu.walkSem);
+    }
+}
+
+void
+SMMUTranslationProcess::walkCacheUpdate(Yield &yield, Addr va,
+                                        Addr vaMask, Addr pa,
+                                        unsigned stage, unsigned level,
+                                        bool leaf, uint8_t permissions)
+{
+    unsigned walkCacheLevels =
+        stage == 1 ? smmu.walkCacheS1Levels : smmu.walkCacheS2Levels;
+
+    if (smmu.walkCacheEnable && ((1<<level) & walkCacheLevels)) {
+        WalkCache::Entry e;
+        e.valid = true;
+        e.va = va;
+        e.vaMask = vaMask;
+        e.asid = stage==1 ? context.asid : 0;
+        e.vmid = context.vmid;
+        e.stage = stage;
+        e.level = level;
+        e.leaf = leaf;
+        e.pa = pa;
+        e.permissions = permissions;
+
+        doSemaphoreDown(yield, smmu.walkSem);
+
+        DPRINTF(SMMUv3, "%sWalkCache upd  va=%#x mask=%#x asid=%#x vmid=%#x "
+                        "tpa=%#x leaf=%s (S%d, L%d)\n",
+                e.stage==2 ? "  " : "",
+                e.va, e.vaMask, e.asid, e.vmid,
+                e.pa, e.leaf, e.stage, e.level);
+
+        smmu.walkCache.store(e);
+
+        doSemaphoreUp(smmu.walkSem);
+    }
+}
+
+/*
+ * Please note:
+ * This does not deal with the case where stage 1 page size
+ * is larger than stage 2 page size.
+ */
+SMMUTranslationProcess::TranslResult
+SMMUTranslationProcess::walkStage1And2(Yield &yield, Addr addr,
+                                       const PageTableOps *pt_ops,
+                                       unsigned level, Addr walkPtr)
+{
+    PageTableOps::pte_t pte = 0;
+
+    doSemaphoreDown(yield, smmu.cycleSem);
+    doDelay(yield, Cycles(1));
+    doSemaphoreUp(smmu.cycleSem);
+
+    for (; level <= pt_ops->lastLevel(); level++) {
+        Addr pte_addr = walkPtr + pt_ops->index(addr, level);
+
+        DPRINTF(SMMUv3, "Fetching S1 L%d PTE from pa=%#08x\n",
+                level, pte_addr);
+
+        doReadPTE(yield, addr, pte_addr, &pte, 1, level);
+
+        DPRINTF(SMMUv3, "Got S1 L%d PTE=%#x from pa=%#08x\n",
+                level, pte, pte_addr);
+
+        doSemaphoreDown(yield, smmu.cycleSem);
+        doDelay(yield, Cycles(1));
+        doSemaphoreUp(smmu.cycleSem);
+
+        bool valid = pt_ops->isValid(pte, level);
+        bool leaf  = pt_ops->isLeaf(pte, level);
+
+        if (!valid) {
+            DPRINTF(SMMUv3, "S1 PTE not valid - fault\n");
+
+            TranslResult tr;
+            tr.fault = FAULT_TRANSLATION;
+            return tr;
+        }
+
+        if (valid && leaf && request.isWrite &&
+            !pt_ops->isWritable(pte, level, false))
+        {
+            DPRINTF(SMMUv3, "S1 page not writable - fault\n");
+
+            TranslResult tr;
+            tr.fault = FAULT_PERMISSION;
+            return tr;
+        }
+
+        walkPtr = pt_ops->nextLevelPointer(pte, level);
+
+        if (leaf)
+            break;
+
+        if (context.stage2Enable) {
+            TranslResult s2tr = translateStage2(yield, walkPtr, false);
+            if (s2tr.fault != FAULT_NONE)
+                return s2tr;
+
+            walkPtr = s2tr.addr;
+        }
+
+        walkCacheUpdate(yield, addr, pt_ops->walkMask(level), walkPtr,
+                        1, level, leaf, 0);
+    }
+
+    TranslResult tr;
+    tr.fault    = FAULT_NONE;
+    tr.addrMask = pt_ops->pageMask(pte, level);
+    tr.addr     = walkPtr + (addr & ~tr.addrMask);
+    tr.writable = pt_ops->isWritable(pte, level, false);
+
+    if (context.stage2Enable) {
+        TranslResult s2tr = translateStage2(yield, tr.addr, true);
+        if (s2tr.fault != FAULT_NONE)
+            return s2tr;
+
+        tr = combineTranslations(tr, s2tr);
+    }
+
+    walkCacheUpdate(yield, addr, tr.addrMask, tr.addr,
+                    1, level, true, tr.writable);
+
+    return tr;
+}
+
+SMMUTranslationProcess::TranslResult
+SMMUTranslationProcess::walkStage2(Yield &yield, Addr addr, bool final_tr,
+                                   const PageTableOps *pt_ops,
+                                   unsigned level, Addr walkPtr)
+{
+    PageTableOps::pte_t pte;
+
+    doSemaphoreDown(yield, smmu.cycleSem);
+    doDelay(yield, Cycles(1));
+    doSemaphoreUp(smmu.cycleSem);
+
+    for (; level <= pt_ops->lastLevel(); level++) {
+        Addr pte_addr = walkPtr + pt_ops->index(addr, level);
+
+        DPRINTF(SMMUv3, "  Fetching S2 L%d PTE from pa=%#08x\n",
+                level, pte_addr);
+
+        doReadPTE(yield, addr, pte_addr, &pte, 2, level);
+
+        DPRINTF(SMMUv3, "  Got S2 L%d PTE=%#x from pa=%#08x\n",
+                level, pte, pte_addr);
+
+        doSemaphoreDown(yield, smmu.cycleSem);
+        doDelay(yield, Cycles(1));
+        doSemaphoreUp(smmu.cycleSem);
+
+        bool valid = pt_ops->isValid(pte, level);
+        bool leaf  = pt_ops->isLeaf(pte, level);
+
+        if (!valid) {
+            DPRINTF(SMMUv3, "  S2 PTE not valid - fault\n");
+
+            TranslResult tr;
+            tr.fault = FAULT_TRANSLATION;
+            return tr;
+        }
+
+        if (valid && leaf && request.isWrite &&
+            !pt_ops->isWritable(pte, level, true))
+        {
+            DPRINTF(SMMUv3, "  S2 PTE not writable = fault\n");
+
+            TranslResult tr;
+            tr.fault = FAULT_PERMISSION;
+            return tr;
+        }
+
+        walkPtr = pt_ops->nextLevelPointer(pte, level);
+
+        if (final_tr || smmu.walkCacheNonfinalEnable)
+            walkCacheUpdate(yield, addr, pt_ops->walkMask(level), walkPtr,
+                            2, level, leaf,
+                            leaf ? pt_ops->isWritable(pte, level, true) : 0);
+        if (leaf)
+            break;
+    }
+
+    TranslResult tr;
+    tr.fault    = FAULT_NONE;
+    tr.addrMask = pt_ops->pageMask(pte, level);
+    tr.addr     = walkPtr + (addr & ~tr.addrMask);
+    tr.writable = pt_ops->isWritable(pte, level, true);
+
+    return tr;
+}
+
+SMMUTranslationProcess::TranslResult
+SMMUTranslationProcess::translateStage1And2(Yield &yield, Addr addr)
+{
+    const PageTableOps *pt_ops =
+        smmu.getPageTableOps(context.stage1TranslGranule);
+
+    const WalkCache::Entry *walk_ep = NULL;
+    unsigned level;
+
+    // Level here is actually (level+1) so we can count down
+    // to 0 using unsigned int.
+    for (level = pt_ops->lastLevel() + 1;
+        level > pt_ops->firstLevel();
+        level--)
+    {
+        walkCacheLookup(yield, walk_ep, addr,
+                        context.asid, context.vmid, 1, level-1);
+
+        if (walk_ep)
+            break;
+    }
+
+    // Correct level (see above).
+    level -= 1;
+
+    TranslResult tr;
+    if (walk_ep) {
+        if (walk_ep->leaf) {
+            tr.fault    = FAULT_NONE;
+            tr.addr     = walk_ep->pa + (addr & ~walk_ep->vaMask);
+            tr.addrMask = walk_ep->vaMask;
+            tr.writable = walk_ep->permissions;
+        } else {
+            tr = walkStage1And2(yield, addr, pt_ops, level+1, walk_ep->pa);
+        }
+    } else {
+        Addr table_addr = context.ttb0;
+        if (context.stage2Enable) {
+            TranslResult s2tr = translateStage2(yield, table_addr, false);
+            if (s2tr.fault != FAULT_NONE)
+                return s2tr;
+
+            table_addr = s2tr.addr;
+        }
+
+        tr = walkStage1And2(yield, addr, pt_ops, pt_ops->firstLevel(),
+                            table_addr);
+    }
+
+    if (tr.fault == FAULT_NONE)
+        DPRINTF(SMMUv3, "Translated vaddr %#x to paddr %#x\n", addr, tr.addr);
+
+    return tr;
+}
+
+SMMUTranslationProcess::TranslResult
+SMMUTranslationProcess::translateStage2(Yield &yield, Addr addr, bool final_tr)
+{
+    const PageTableOps *pt_ops =
+            smmu.getPageTableOps(context.stage2TranslGranule);
+
+    const IPACache::Entry *ipa_ep = NULL;
+    if (smmu.ipaCacheEnable) {
+        doSemaphoreDown(yield, smmu.ipaSem);
+        doDelay(yield, smmu.ipaLat);
+        ipa_ep = smmu.ipaCache.lookup(addr, context.vmid);
+        doSemaphoreUp(smmu.ipaSem);
+    }
+
+    if (ipa_ep) {
+        TranslResult tr;
+        tr.fault    = FAULT_NONE;
+        tr.addr     = ipa_ep->pa + (addr & ~ipa_ep->ipaMask);
+        tr.addrMask = ipa_ep->ipaMask;
+        tr.writable = ipa_ep->permissions;
+
+        DPRINTF(SMMUv3, "  IPACache hit  ipa=%#x vmid=%#x pa=%#x\n",
+            addr, context.vmid, tr.addr);
+
+        return tr;
+    } else if (smmu.ipaCacheEnable) {
+        DPRINTF(SMMUv3, "  IPACache miss ipa=%#x vmid=%#x\n",
+                addr, context.vmid);
+    }
+
+    const WalkCache::Entry *walk_ep = NULL;
+    unsigned level = pt_ops->firstLevel();
+
+    if (final_tr || smmu.walkCacheNonfinalEnable) {
+        // Level here is actually (level+1) so we can count down
+        // to 0 using unsigned int.
+        for (level = pt_ops->lastLevel() + 1;
+            level > pt_ops->firstLevel();
+            level--)
+        {
+            walkCacheLookup(yield, walk_ep, addr,
+                            0, context.vmid, 2, level-1);
+
+            if (walk_ep)
+                break;
+        }
+
+        // Correct level (see above).
+        level -= 1;
+    }
+
+    TranslResult tr;
+    if (walk_ep) {
+        if (walk_ep->leaf) {
+            tr.fault    = FAULT_NONE;
+            tr.addr     = walk_ep->pa + (addr & ~walk_ep->vaMask);
+            tr.addrMask = walk_ep->vaMask;
+            tr.writable = walk_ep->permissions;
+        } else {
+            tr = walkStage2(yield, addr, final_tr, pt_ops,
+                            level + 1, walk_ep->pa);
+        }
+    } else {
+        tr = walkStage2(yield, addr, final_tr, pt_ops, pt_ops->firstLevel(),
+                        context.httb);
+    }
+
+    if (tr.fault == FAULT_NONE)
+        DPRINTF(SMMUv3, "  Translated %saddr %#x to paddr %#x\n",
+            context.stage1Enable ? "ip" : "v", addr, tr.addr);
+
+    if (smmu.ipaCacheEnable) {
+        IPACache::Entry e;
+        e.valid = true;
+        e.ipaMask = tr.addrMask;
+        e.ipa = addr & e.ipaMask;
+        e.pa = tr.addr & tr.addrMask;
+        e.permissions = tr.writable;
+        e.vmid = context.vmid;
+
+        doSemaphoreDown(yield, smmu.ipaSem);
+        smmu.ipaCache.store(e);
+        doSemaphoreUp(smmu.ipaSem);
+    }
+
+    return tr;
+}
+
+SMMUTranslationProcess::TranslResult
+SMMUTranslationProcess::combineTranslations(const TranslResult &s1tr,
+                                            const TranslResult &s2tr) const
+{
+    if (s2tr.fault != FAULT_NONE)
+        return s2tr;
+
+    assert(s1tr.fault == FAULT_NONE);
+
+    TranslResult tr;
+    tr.fault    = FAULT_NONE;
+    tr.addr     = s2tr.addr;
+    tr.addrMask = s1tr.addrMask | s2tr.addrMask;
+    tr.writable = s1tr.writable & s2tr.writable;
+
+    return tr;
+}
+
+bool
+SMMUTranslationProcess::hazard4kCheck()
+{
+    Addr addr4k = request.addr & ~0xfffULL;
+
+    for (auto it = ifc.duplicateReqs.begin();
+         it != ifc.duplicateReqs.end();
+         ++it)
+    {
+        Addr other4k = (*it)->request.addr & ~0xfffULL;
+        if (addr4k == other4k)
+            return true;
+    }
+
+    return false;
+}
+
+void
+SMMUTranslationProcess::hazard4kRegister()
+{
+    DPRINTF(SMMUv3Hazard, "4kReg:  p=%p a4k=%#x\n",
+            this, request.addr & ~0xfffULL);
+
+    ifc.duplicateReqs.push_back(this);
+}
+
+void
+SMMUTranslationProcess::hazard4kHold(Yield &yield)
+{
+    Addr addr4k = request.addr & ~0xfffULL;
+
+    bool found_hazard;
+
+    do {
+        found_hazard = false;
+
+        for (auto it = ifc.duplicateReqs.begin();
+             it!=ifc.duplicateReqs.end() && *it!=this;
+             ++it)
+        {
+            Addr other4k = (*it)->request.addr & ~0xfffULL;
+
+            DPRINTF(SMMUv3Hazard, "4kHold: p=%p a4k=%#x Q: p=%p a4k=%#x\n",
+                    this, addr4k, *it, other4k);
+
+            if (addr4k == other4k) {
+                DPRINTF(SMMUv3Hazard,
+                        "4kHold: p=%p a4k=%#x WAIT on p=%p a4k=%#x\n",
+                        this, addr4k, *it, other4k);
+
+                doWaitForSignal(yield, ifc.duplicateReqRemoved);
+
+                DPRINTF(SMMUv3Hazard, "4kHold: p=%p a4k=%#x RESUME\n",
+                        this, addr4k);
+
+                // This is to avoid checking *it!=this after doWaitForSignal()
+                // since it could have been deleted.
+                found_hazard = true;
+                break;
+            }
+        }
+    } while (found_hazard);
+}
+
+void
+SMMUTranslationProcess::hazard4kRelease()
+{
+    DPRINTF(SMMUv3Hazard, "4kRel:  p=%p a4k=%#x\n",
+            this, request.addr & ~0xfffULL);
+
+    std::list<SMMUTranslationProcess *>::iterator it;
+
+    for (it = ifc.duplicateReqs.begin(); it != ifc.duplicateReqs.end(); ++it)
+        if (*it == this)
+            break;
+
+    if (it == ifc.duplicateReqs.end())
+        panic("hazard4kRelease: request not found");
+
+    ifc.duplicateReqs.erase(it);
+
+    doBroadcastSignal(ifc.duplicateReqRemoved);
+}
+
+void
+SMMUTranslationProcess::hazardIdRegister()
+{
+    auto orderId = AMBA::orderId(request.pkt);
+
+    DPRINTF(SMMUv3Hazard, "IdReg:  p=%p oid=%d\n", this, orderId);
+
+    assert(orderId < SMMU_MAX_TRANS_ID);
+
+    std::list<SMMUTranslationProcess *> &depReqs =
+        request.isWrite ?
+            ifc.dependentWrites[orderId] : ifc.dependentReads[orderId];
+    depReqs.push_back(this);
+}
+
+void
+SMMUTranslationProcess::hazardIdHold(Yield &yield)
+{
+    auto orderId = AMBA::orderId(request.pkt);
+
+    DPRINTF(SMMUv3Hazard, "IdHold: p=%p oid=%d\n", this, orderId);
+
+    std::list<SMMUTranslationProcess *> &depReqs =
+        request.isWrite ?
+            ifc.dependentWrites[orderId] : ifc.dependentReads[orderId];
+    std::list<SMMUTranslationProcess *>::iterator it;
+
+    bool found_hazard;
+
+    do {
+        found_hazard = false;
+
+        for (auto it = depReqs.begin(); it!=depReqs.end() && *it!=this; ++it) {
+            DPRINTF(SMMUv3Hazard, "IdHold: p=%p oid=%d Q: %p\n",
+                    this, orderId, *it);
+
+            if (AMBA::orderId((*it)->request.pkt) == orderId) {
+                DPRINTF(SMMUv3Hazard, "IdHold: p=%p oid=%d WAIT on=%p\n",
+                        this, orderId, *it);
+
+                doWaitForSignal(yield, ifc.dependentReqRemoved);
+
+                DPRINTF(SMMUv3Hazard, "IdHold: p=%p oid=%d RESUME\n",
+                        this, orderId);
+
+                // This is to avoid checking *it!=this after doWaitForSignal()
+                // since it could have been deleted.
+                found_hazard = true;
+                break;
+            }
+        }
+    } while (found_hazard);
+}
+
+void
+SMMUTranslationProcess::hazardIdRelease()
+{
+    auto orderId = AMBA::orderId(request.pkt);
+
+    DPRINTF(SMMUv3Hazard, "IdRel:  p=%p oid=%d\n", this, orderId);
+
+    std::list<SMMUTranslationProcess *> &depReqs =
+        request.isWrite ?
+            ifc.dependentWrites[orderId] : ifc.dependentReads[orderId];
+    std::list<SMMUTranslationProcess *>::iterator it;
+
+    for (it = depReqs.begin(); it != depReqs.end(); ++it) {
+        if (*it == this)
+            break;
+    }
+
+    if (it == depReqs.end())
+        panic("hazardIdRelease: request not found");
+
+    depReqs.erase(it);
+
+    doBroadcastSignal(ifc.dependentReqRemoved);
+}
+
+void
+SMMUTranslationProcess::issuePrefetch(Addr addr)
+{
+    if (!smmu.system.isTimingMode())
+        return;
+
+    if (!ifc.prefetchEnable || ifc.xlateSlotsRemaining == 0)
+        return;
+
+    ifc.xlateSlotsRemaining--;
+
+    std::string proc_name = csprintf("%sprf", name());
+    SMMUTranslationProcess *proc =
+        new SMMUTranslationProcess(proc_name, smmu, ifc);
+
+    proc->beginTransaction(
+            SMMUTranslRequest::prefetch(addr, request.sid, request.ssid));
+    proc->scheduleWakeup(smmu.clockEdge(Cycles(1)));
+}
+
+void
+SMMUTranslationProcess::completeTransaction(Yield &yield,
+                                            const TranslResult &tr)
+{
+    assert(tr.fault == FAULT_NONE);
+
+    unsigned numMasterBeats = request.isWrite ?
+        (request.size + (smmu.masterPortWidth-1))
+            / smmu.masterPortWidth :
+        1;
+
+    doSemaphoreDown(yield, smmu.masterPortSem);
+    doDelay(yield, Cycles(numMasterBeats));
+    doSemaphoreUp(smmu.masterPortSem);
+
+
+    smmu.translationTimeDist.sample(curTick() - recvTick);
+    ifc.xlateSlotsRemaining++;
+    if (!request.isAtsRequest && request.isWrite)
+        ifc.wrBufSlotsRemaining +=
+            (request.size + (ifc.portWidth-1)) / ifc.portWidth;
+
+    smmu.scheduleSlaveRetries();
+
+
+    SMMUAction a;
+
+    if (request.isAtsRequest) {
+        a.type = ACTION_SEND_RESP_ATS;
+
+        if (smmu.system.isAtomicMode()) {
+            request.pkt->makeAtomicResponse();
+        } else if (smmu.system.isTimingMode()) {
+            request.pkt->makeTimingResponse();
+        } else {
+            panic("Not in atomic or timing mode");
+        }
+    } else {
+        a.type = ACTION_SEND_REQ_FINAL;
+        a.ifc = &ifc;
+    }
+
+    a.pkt = request.pkt;
+    a.delay = 0;
+
+    a.pkt->setAddr(tr.addr);
+    a.pkt->req->setPaddr(tr.addr);
+
+    yield(a);
+
+    if (!request.isAtsRequest) {
+        PacketPtr pkt = yield.get();
+        pkt->setAddr(request.addr);
+
+        a.type = ACTION_SEND_RESP;
+        a.pkt = pkt;
+        a.ifc = &ifc;
+        a.delay = 0;
+        yield(a);
+    }
+}
+
+void
+SMMUTranslationProcess::completePrefetch(Yield &yield)
+{
+    ifc.xlateSlotsRemaining++;
+
+    SMMUAction a;
+    a.type = ACTION_TERMINATE;
+    a.pkt = NULL;
+    a.ifc = &ifc;
+    a.delay = 0;
+    yield(a);
+}
+
+void
+SMMUTranslationProcess::sendEvent(Yield &yield, const SMMUEvent &ev)
+{
+    int sizeMask = mask(smmu.regs.eventq_base & Q_BASE_SIZE_MASK) &
+            Q_CONS_PROD_MASK;
+
+    if (((smmu.regs.eventq_prod+1) & sizeMask) ==
+            (smmu.regs.eventq_cons & sizeMask))
+        panic("Event queue full - aborting\n");
+
+    Addr event_addr =
+        (smmu.regs.eventq_base & Q_BASE_ADDR_MASK) +
+        (smmu.regs.eventq_prod & sizeMask) * sizeof(ev);
+
+    DPRINTF(SMMUv3, "Sending event to addr=%#08x (pos=%d): type=%#x stag=%#x "
+        "flags=%#x sid=%#x ssid=%#x va=%#08x ipa=%#x\n",
+        event_addr, smmu.regs.eventq_prod, ev.type, ev.stag,
+        ev.flags, ev.streamId, ev.substreamId, ev.va, ev.ipa);
+
+    // This deliberately resets the overflow field in eventq_prod!
+    smmu.regs.eventq_prod = (smmu.regs.eventq_prod + 1) & sizeMask;
+
+    doWrite(yield, event_addr, &ev, sizeof(ev));
+
+    if (!(smmu.regs.eventq_irq_cfg0 & E_BASE_ENABLE_MASK))
+        panic("eventq msi not enabled\n");
+
+    doWrite(yield, smmu.regs.eventq_irq_cfg0 & E_BASE_ADDR_MASK,
+            &smmu.regs.eventq_irq_cfg1, sizeof(smmu.regs.eventq_irq_cfg1));
+}
+
+void
+SMMUTranslationProcess::doReadSTE(Yield &yield,
+                                  StreamTableEntry &ste,
+                                  uint32_t sid)
+{
+    unsigned max_sid = 1 << (smmu.regs.strtab_base_cfg & ST_CFG_SIZE_MASK);
+    if (sid >= max_sid)
+        panic("SID %#x out of range, max=%#x", sid, max_sid);
+
+    Addr ste_addr;
+
+    if ((smmu.regs.strtab_base_cfg & ST_CFG_FMT_MASK) == ST_CFG_FMT_2LEVEL) {
+        unsigned split =
+            (smmu.regs.strtab_base_cfg & ST_CFG_SPLIT_MASK) >> ST_CFG_SPLIT_SHIFT;
+
+        if (split!= 7 && split!=8 && split!=16)
+            panic("Invalid stream table split %d", split);
+
+        uint64_t l2_ptr;
+        uint64_t l2_addr =
+            (smmu.regs.strtab_base & VMT_BASE_ADDR_MASK) +
+            bits(sid, 32, split) * sizeof(l2_ptr);
+
+        DPRINTF(SMMUv3, "Read L1STE at %#x\n", l2_addr);
+
+        doReadConfig(yield, l2_addr, &l2_ptr, sizeof(l2_ptr), sid, 0);
+
+        DPRINTF(SMMUv3, "Got L1STE L1 at %#x: 0x%016x\n", l2_addr, l2_ptr);
+
+        unsigned span = l2_ptr & ST_L2_SPAN_MASK;
+        if (span == 0)
+            panic("Invalid level 1 stream table descriptor");
+
+        unsigned index = bits(sid, split-1, 0);
+        if (index >= (1 << span))
+            panic("StreamID %d out of level 1 descriptor range %d",
+                  sid, 1<<span);
+
+        ste_addr = (l2_ptr & ST_L2_ADDR_MASK) + index * sizeof(ste);
+
+        smmu.steL1Fetches++;
+    } else if ((smmu.regs.strtab_base_cfg & ST_CFG_FMT_MASK) == ST_CFG_FMT_LINEAR) {
+        ste_addr =
+            (smmu.regs.strtab_base & VMT_BASE_ADDR_MASK) + sid * sizeof(ste);
+    } else {
+        panic("Invalid stream table format");
+    }
+
+    DPRINTF(SMMUv3, "Read STE at %#x\n", ste_addr);
+
+    doReadConfig(yield, ste_addr, &ste, sizeof(ste), sid, 0);
+
+    DPRINTF(SMMUv3, "Got STE at %#x [0]: 0x%016x\n", ste_addr, ste.dw0);
+    DPRINTF(SMMUv3, "    STE at %#x [1]: 0x%016x\n", ste_addr, ste.dw1);
+    DPRINTF(SMMUv3, "    STE at %#x [2]: 0x%016x\n", ste_addr, ste.dw2);
+    DPRINTF(SMMUv3, "    STE at %#x [3]: 0x%016x\n", ste_addr, ste.dw3);
+    DPRINTF(SMMUv3, "    STE at %#x [4]: 0x%016x\n", ste_addr, ste._pad[0]);
+    DPRINTF(SMMUv3, "    STE at %#x [5]: 0x%016x\n", ste_addr, ste._pad[1]);
+    DPRINTF(SMMUv3, "    STE at %#x [6]: 0x%016x\n", ste_addr, ste._pad[2]);
+    DPRINTF(SMMUv3, "    STE at %#x [7]: 0x%016x\n", ste_addr, ste._pad[3]);
+
+    if (!ste.dw0.valid)
+        panic("STE @ %#x not valid\n", ste_addr);
+
+    smmu.steFetches++;
+}
+
+void
+SMMUTranslationProcess::doReadCD(Yield &yield,
+                                 ContextDescriptor &cd,
+                                 const StreamTableEntry &ste,
+                                 uint32_t sid, uint32_t ssid)
+{
+    Addr cd_addr;
+
+    if (ste.dw0.s1cdmax == 0) {
+        cd_addr = ste.dw0.s1ctxptr << ST_CD_ADDR_SHIFT;
+    } else {
+        unsigned max_ssid = 1 << ste.dw0.s1cdmax;
+        if (ssid >= max_ssid)
+            panic("SSID %#x out of range, max=%#x", ssid, max_ssid);
+
+        if (ste.dw0.s1fmt==STAGE1_CFG_2L_4K ||
+            ste.dw0.s1fmt==STAGE1_CFG_2L_64K)
+        {
+            unsigned split = ste.dw0.s1fmt==STAGE1_CFG_2L_4K ? 7 : 11;
+
+            uint64_t l2_ptr;
+            uint64_t l2_addr = (ste.dw0.s1ctxptr << ST_CD_ADDR_SHIFT) +
+                bits(ssid, 24, split) * sizeof(l2_ptr);
+
+            if (context.stage2Enable)
+                l2_addr = translateStage2(yield, l2_addr, false).addr;
+
+            DPRINTF(SMMUv3, "Read L1CD at %#x\n", l2_addr);
+
+            doReadConfig(yield, l2_addr, &l2_ptr, sizeof(l2_ptr), sid, ssid);
+
+            DPRINTF(SMMUv3, "Got L1CD at %#x: 0x%016x\n", l2_addr, l2_ptr);
+
+            cd_addr = l2_ptr + bits(ssid, split-1, 0) * sizeof(cd);
+
+            smmu.cdL1Fetches++;
+        } else if (ste.dw0.s1fmt == STAGE1_CFG_1L) {
+            cd_addr = (ste.dw0.s1ctxptr << ST_CD_ADDR_SHIFT) + ssid*sizeof(cd);
+        }
+    }
+
+    if (context.stage2Enable)
+        cd_addr = translateStage2(yield, cd_addr, false).addr;
+
+    DPRINTF(SMMUv3, "Read CD at %#x\n", cd_addr);
+
+    doReadConfig(yield, cd_addr, &cd, sizeof(cd), sid, ssid);
+
+    DPRINTF(SMMUv3, "Got CD at %#x [0]: 0x%016x\n", cd_addr, cd.dw0);
+    DPRINTF(SMMUv3, "    CD at %#x [1]: 0x%016x\n", cd_addr, cd.dw1);
+    DPRINTF(SMMUv3, "    CD at %#x [2]: 0x%016x\n", cd_addr, cd.dw2);
+    DPRINTF(SMMUv3, "    CD at %#x [3]: 0x%016x\n", cd_addr, cd.mair);
+    DPRINTF(SMMUv3, "    CD at %#x [4]: 0x%016x\n", cd_addr, cd.amair);
+    DPRINTF(SMMUv3, "    CD at %#x [5]: 0x%016x\n", cd_addr, cd._pad[0]);
+    DPRINTF(SMMUv3, "    CD at %#x [6]: 0x%016x\n", cd_addr, cd._pad[1]);
+    DPRINTF(SMMUv3, "    CD at %#x [7]: 0x%016x\n", cd_addr, cd._pad[2]);
+
+
+    if (!cd.dw0.valid)
+        panic("CD @ %#x not valid\n", cd_addr);
+
+    smmu.cdFetches++;
+}
+
+void
+SMMUTranslationProcess::doReadConfig(Yield &yield, Addr addr,
+                                     void *ptr, size_t size,
+                                     uint32_t sid, uint32_t ssid)
+{
+    doRead(yield, addr, ptr, size);
+}
+
+void
+SMMUTranslationProcess::doReadPTE(Yield &yield, Addr va, Addr addr,
+                                  void *ptr, unsigned stage,
+                                  unsigned level)
+{
+    size_t pte_size = sizeof(PageTableOps::pte_t);
+
+    Addr mask = pte_size - 1;
+    Addr base = addr & ~mask;
+
+    doRead(yield, base, ptr, pte_size);
+}
diff --git a/src/dev/arm/smmu_v3_transl.hh b/src/dev/arm/smmu_v3_transl.hh
new file mode 100644
index 0000000..ac0dc77
--- /dev/null
+++ b/src/dev/arm/smmu_v3_transl.hh
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2013, 2018-2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Stan Czerniawski
+ */
+
+#ifndef __DEV_ARM_SMMU_V3_TRANSL_HH__
+#define __DEV_ARM_SMMU_V3_TRANSL_HH__
+
+#include "dev/arm/smmu_v3_proc.hh"
+#include "dev/arm/smmu_v3_ptops.hh"
+#include "dev/arm/smmu_v3_slaveifc.hh"
+#include "mem/packet.hh"
+
+struct SMMUTranslRequest
+{
+    Addr     addr;
+    unsigned size;
+    uint32_t sid;  // streamId
+    uint32_t ssid; // substreamId
+    bool     isWrite;
+    bool     isPrefetch;
+    bool     isAtsRequest;
+
+    PacketPtr pkt;
+
+    static SMMUTranslRequest fromPacket(PacketPtr pkt, bool ats = false);
+    static SMMUTranslRequest prefetch(Addr addr, uint32_t sid, uint32_t ssid);
+};
+
+class SMMUTranslationProcess : public SMMUProcess
+{
+  private:
+    struct TranslContext
+    {
+        bool stage1Enable;
+        bool stage2Enable;
+        Addr ttb0, ttb1, httb;
+        uint16_t asid;
+        uint16_t vmid;
+        uint8_t stage1TranslGranule;
+        uint8_t stage2TranslGranule;
+    };
+
+    enum FaultType
+    {
+        FAULT_NONE,
+        FAULT_TRANSLATION, // F_TRANSLATION
+        FAULT_PERMISSION,  // F_PERMISSION
+    };
+
+    struct TranslResult
+    {
+        FaultType  fault;
+        Addr       addr;
+        Addr       addrMask;
+        bool       writable;
+    };
+
+    SMMUv3SlaveInterface &ifc;
+
+    SMMUTranslRequest request;
+    TranslContext context;
+
+    Tick recvTick;
+    Tick faultTick;
+
+    virtual void main(Yield &yield);
+
+    TranslResult bypass(Addr addr) const;
+    TranslResult smmuTranslation(Yield &yield);
+
+    bool microTLBLookup(Yield &yield, TranslResult &tr);
+    bool ifcTLBLookup(Yield &yield, TranslResult &tr, bool &wasPrefetched);
+    bool smmuTLBLookup(Yield &yield, TranslResult &tr);
+
+    void microTLBUpdate(Yield &yield, const TranslResult &tr);
+    void ifcTLBUpdate(Yield &yield, const TranslResult &tr);
+    void smmuTLBUpdate(Yield &yield, const TranslResult &tr);
+
+    bool configCacheLookup(Yield &yield, TranslContext &tc);
+    void configCacheUpdate(Yield &yield, const TranslContext &tc);
+    bool findConfig(Yield &yield, TranslContext &tc, TranslResult &tr);
+
+    void walkCacheLookup(Yield &yield,
+                         const WalkCache::Entry *&walkEntry,
+                         Addr addr, uint16_t asid, uint16_t vmid,
+                         unsigned stage, unsigned level);
+
+    void walkCacheUpdate(Yield &yield, Addr va, Addr vaMask, Addr pa,
+                         unsigned stage, unsigned level,
+                         bool leaf, uint8_t permissions);
+
+    TranslResult walkStage1And2(Yield &yield, Addr addr,
+                                const PageTableOps *pt_ops,
+                                unsigned level, Addr walkPtr);
+
+    TranslResult walkStage2(Yield &yield, Addr addr, bool final_tr,
+                            const PageTableOps *pt_ops,
+                            unsigned level, Addr walkPtr);
+
+    TranslResult translateStage1And2(Yield &yield, Addr addr);
+    TranslResult translateStage2(Yield &yield, Addr addr, bool final_tr);
+
+    TranslResult combineTranslations(const TranslResult &s1tr,
+                                     const TranslResult &s2tr) const;
+
+    /**
+     * Used to force ordering on transactions with same
+     * (SID, SSID, 4k page) to avoid multiple identical
+     * page-table walks.
+     */
+    bool hazard4kCheck();
+    void hazard4kRegister();
+    void hazard4kHold(Yield &yield);
+    void hazard4kRelease();
+
+    /**
+     * Used to force ordering on transactions with the same orderId.
+     * This attempts to model AXI IDs.
+     */
+    void hazardIdRegister();
+    void hazardIdHold(Yield &yield);
+    void hazardIdRelease();
+
+    void issuePrefetch(Addr addr);
+
+    void completeTransaction(Yield &yield, const TranslResult &tr);
+    void completePrefetch(Yield &yield);
+
+    void sendEvent(Yield &yield, const SMMUEvent &ev);
+
+    void doReadSTE(Yield &yield, StreamTableEntry &ste, uint32_t sid);
+    void doReadCD(Yield &yield, ContextDescriptor &cd,
+                  const StreamTableEntry &ste, uint32_t sid, uint32_t ssid);
+    void doReadConfig(Yield &yield, Addr addr, void *ptr, size_t size,
+                      uint32_t sid, uint32_t ssid);
+    void doReadPTE(Yield &yield, Addr va, Addr addr, void *ptr,
+                   unsigned stage, unsigned level);
+
+  public:
+    SMMUTranslationProcess(const std::string &name, SMMUv3 &_smmu,
+        SMMUv3SlaveInterface &_ifc)
+      :
+        SMMUProcess(name, _smmu),
+        ifc(_ifc)
+    {
+        reinit();
+    }
+
+    virtual ~SMMUTranslationProcess() {}
+
+    void beginTransaction(const SMMUTranslRequest &req);
+    void resumeTransaction();
+};
+
+#endif /* __DEV_ARM_SMMU_V3_TRANSL_HH__ */