diff --git a/src/arch/arm/fastmodel/iris/thread_context.hh b/src/arch/arm/fastmodel/iris/thread_context.hh
index e0a07d1..9a1eaba 100644
--- a/src/arch/arm/fastmodel/iris/thread_context.hh
+++ b/src/arch/arm/fastmodel/iris/thread_context.hh
@@ -211,7 +211,7 @@
     }
 
     CheckerCPU *getCheckerCpuPtr() override { return nullptr; }
-    ArmISA::Decoder *
+    InstDecoder *
     getDecoderPtr() override
     {
         panic("%s not implemented.", __FUNCTION__);
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index a2d9700..2bc335b 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -964,7 +964,8 @@
         pc.illegalExec(cpsr.il == 1);
         selfDebug->setDebugMask(cpsr.d == 1);
 
-        tc->getDecoderPtr()->setSveLen((getCurSveVecLenInBits() >> 7) - 1);
+        tc->getDecoderPtr()->as<Decoder>().setSveLen(
+                (getCurSveVecLenInBits() >> 7) - 1);
 
         // Follow slightly different semantics if a CheckerCPU object
         // is connected
@@ -1130,7 +1131,7 @@
                 newVal = (newVal & (uint32_t)fpscrMask) |
                          (readMiscRegNoEffect(MISCREG_FPSCR) &
                           ~(uint32_t)fpscrMask);
-                tc->getDecoderPtr()->setContext(newVal);
+                tc->getDecoderPtr()->as<Decoder>().setContext(newVal);
             }
             break;
           case MISCREG_FPSR:
@@ -2492,7 +2493,8 @@
           case MISCREG_ZCR_EL3:
           case MISCREG_ZCR_EL2:
           case MISCREG_ZCR_EL1:
-            tc->getDecoderPtr()->setSveLen((getCurSveVecLenInBits() >> 7) - 1);
+            tc->getDecoderPtr()->as<Decoder>().setSveLen(
+                    (getCurSveVecLenInBits() >> 7) - 1);
             break;
         }
         setMiscRegNoEffect(misc_reg, newVal);
diff --git a/src/arch/generic/decoder.hh b/src/arch/generic/decoder.hh
index 6d828a2..9f0fc2a 100644
--- a/src/arch/generic/decoder.hh
+++ b/src/arch/generic/decoder.hh
@@ -63,6 +63,20 @@
         outOfBytes = true;
     }
 
+    template <class Type>
+    Type &
+    as()
+    {
+        return *static_cast<Type *>(this);
+    }
+
+    template <class Type>
+    const Type &
+    as() const
+    {
+        return *static_cast<const Type *>(this);
+    }
+
     /**
      * Take over the state from an old decoder when switching CPUs.
      *
diff --git a/src/arch/sparc/isa.cc b/src/arch/sparc/isa.cc
index ed43abd..2debbc5 100644
--- a/src/arch/sparc/isa.cc
+++ b/src/arch/sparc/isa.cc
@@ -749,7 +749,7 @@
 
     switch (miscReg) {
       case MISCREG_ASI:
-        tc->getDecoderPtr()->setContext(val);
+        tc->getDecoderPtr()->as<Decoder>().setContext(val);
         break;
       case MISCREG_STICK:
       case MISCREG_TICK:
diff --git a/src/arch/x86/isa.cc b/src/arch/x86/isa.cc
index 8f1d401..89e0d29 100644
--- a/src/arch/x86/isa.cc
+++ b/src/arch/x86/isa.cc
@@ -103,7 +103,7 @@
 
     regVal[MISCREG_M5_REG] = m5reg;
     if (tc)
-        tc->getDecoderPtr()->setM5Reg(m5reg);
+        tc->getDecoderPtr()->as<Decoder>().setM5Reg(m5reg);
 }
 
 void
@@ -479,7 +479,7 @@
 ISA::setThreadContext(ThreadContext *_tc)
 {
     BaseISA::setThreadContext(_tc);
-    tc->getDecoderPtr()->setM5Reg(regVal[MISCREG_M5_REG]);
+    tc->getDecoderPtr()->as<Decoder>().setM5Reg(regVal[MISCREG_M5_REG]);
 }
 
 std::string
diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh
index b157145..218d87f 100644
--- a/src/cpu/checker/cpu_impl.hh
+++ b/src/cpu/checker/cpu_impl.hh
@@ -69,7 +69,7 @@
     if (fault != NoFault) {
         curMacroStaticInst = nullStaticInstPtr;
         fault->invoke(tc, curStaticInst);
-        thread->decoder.reset();
+        thread->decoder->reset();
     } else {
         if (curStaticInst) {
             if (curStaticInst->isLastMicroop())
@@ -111,7 +111,7 @@
               "a non-interuptable instruction!", curTick());
     }
     boundaryInst = NULL;
-    thread->decoder.reset();
+    thread->decoder->reset();
     curMacroStaticInst = nullStaticInstPtr;
 }
 
@@ -182,7 +182,7 @@
     inst = NULL;
 
     auto &decoder = thread->decoder;
-    const Addr pc_mask = decoder.pcMask();
+    const Addr pc_mask = decoder->pcMask();
 
     // Try to check all instructions that are completed, ending if we
     // run out of instructions to check or if an instruction is not
@@ -235,10 +235,10 @@
             if (!curMacroStaticInst) {
                 // set up memory request for instruction fetch
                 auto mem_req = std::make_shared<Request>(
-                    fetch_PC, decoder.moreBytesSize(), 0, requestorId,
+                    fetch_PC, decoder->moreBytesSize(), 0, requestorId,
                     fetch_PC, thread->contextId());
 
-                mem_req->setVirt(fetch_PC, decoder.moreBytesSize(),
+                mem_req->setVirt(fetch_PC, decoder->moreBytesSize(),
                                  Request::INST_FETCH, requestorId,
                                  thread->pcState().instAddr());
 
@@ -272,7 +272,7 @@
                 } else {
                     PacketPtr pkt = new Packet(mem_req, MemCmd::ReadReq);
 
-                    pkt->dataStatic(decoder.moreBytesPtr());
+                    pkt->dataStatic(decoder->moreBytesPtr());
                     icachePort->sendFunctional(pkt);
 
                     delete pkt;
@@ -285,7 +285,7 @@
 
                 if (isRomMicroPC(pc_state->microPC())) {
                     fetchDone = true;
-                    curStaticInst = decoder.fetchRomMicroop(
+                    curStaticInst = decoder->fetchRomMicroop(
                             pc_state->microPC(), nullptr);
                 } else if (!curMacroStaticInst) {
                     //We're not in the middle of a macro instruction
@@ -295,18 +295,18 @@
                     //If more fetch data is needed, pass it in.
                     Addr fetch_pc =
                         (pc_state->instAddr() & pc_mask) + fetchOffset;
-                    decoder.moreBytes(*pc_state, fetch_pc);
+                    decoder->moreBytes(*pc_state, fetch_pc);
 
                     //If an instruction is ready, decode it.
                     //Otherwise, we'll have to fetch beyond the
                     //memory chunk at the current pc.
-                    if (decoder.instReady()) {
+                    if (decoder->instReady()) {
                         fetchDone = true;
-                        instPtr = decoder.decode(*pc_state);
+                        instPtr = decoder->decode(*pc_state);
                         thread->pcState(*pc_state);
                     } else {
                         fetchDone = false;
-                        fetchOffset += decoder.moreBytesSize();
+                        fetchOffset += decoder->moreBytesSize();
                     }
 
                     //If we decoded an instruction and it's microcoded,
@@ -327,7 +327,7 @@
             }
         }
         // reset decoder on Checker
-        decoder.reset();
+        decoder->reset();
 
         // Check Checker and CPU get same instruction, and record
         // any faults the CPU may have had.
diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh
index 9e6c812..8093a9d 100644
--- a/src/cpu/checker/thread_context.hh
+++ b/src/cpu/checker/thread_context.hh
@@ -155,7 +155,7 @@
 
     BaseISA *getIsaPtr() override { return actualTC->getIsaPtr(); }
 
-    TheISA::Decoder *
+    InstDecoder *
     getDecoderPtr() override
     {
         return actualTC->getDecoderPtr();
diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc
index 72e9957..3a5b310 100644
--- a/src/cpu/minor/fetch1.cc
+++ b/src/cpu/minor/fetch1.cc
@@ -98,7 +98,7 @@
             maxLineWidth);
     }
 
-    size_t inst_size = cpu.threads[0]->decoder.moreBytesSize();
+    size_t inst_size = cpu.threads[0]->decoder->moreBytesSize();
 
     /* These assertions should be copied to the Python config. as well */
     if ((lineSnap % inst_size) != 0) {
diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc
index 44789e2..c5a7045 100644
--- a/src/cpu/minor/fetch2.cc
+++ b/src/cpu/minor/fetch2.cc
@@ -39,7 +39,7 @@
 
 #include <string>
 
-#include "arch/decoder.hh"
+#include "arch/generic/decoder.hh"
 #include "base/logging.hh"
 #include "base/trace.hh"
 #include "cpu/minor/pipeline.hh"
@@ -313,7 +313,7 @@
             prediction.isBubble() /* No predicted branch */)
         {
             ThreadContext *thread = cpu.getContext(line_in->id.threadId);
-            TheISA::Decoder *decoder = thread->getDecoderPtr();
+            InstDecoder *decoder = thread->getDecoderPtr();
 
             /* Discard line due to prediction sequence number being wrong but
              * without the streamSeqNum number having changed */
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 2d903b2..c625a61 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -356,7 +356,7 @@
     }
 
     /** The decoder. */
-    TheISA::Decoder *decoder[MaxThreads];
+    InstDecoder *decoder[MaxThreads];
 
     RequestPort &getInstPort() { return icachePort; }
 
diff --git a/src/cpu/o3/thread_context.cc b/src/cpu/o3/thread_context.cc
index 9154012..01b92b7 100644
--- a/src/cpu/o3/thread_context.cc
+++ b/src/cpu/o3/thread_context.cc
@@ -58,8 +58,8 @@
 
     getIsaPtr()->takeOverFrom(this, old_context);
 
-    TheISA::Decoder *newDecoder = getDecoderPtr();
-    TheISA::Decoder *oldDecoder = old_context->getDecoderPtr();
+    InstDecoder *newDecoder = getDecoderPtr();
+    InstDecoder *oldDecoder = old_context->getDecoderPtr();
     newDecoder->takeOverFrom(oldDecoder);
 
     thread->noSquashFromTC = false;
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index 03943f4..14bc7f5 100644
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -112,7 +112,7 @@
         return cpu->isa[thread->threadId()];
     }
 
-    TheISA::Decoder *
+    InstDecoder *
     getDecoderPtr() override
     {
         return cpu->fetch.decoder[thread->threadId()];
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 8ee10b6..eee0564 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -748,7 +748,7 @@
 
     // ifetch_req is initialized to read the instruction
     // directly into the CPU object's inst field.
-    pkt.dataStatic(decoder.moreBytesPtr());
+    pkt.dataStatic(decoder->moreBytesPtr());
 
     Tick latency = sendPacket(icachePort, &pkt);
     assert(!pkt.isError());
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 7067f20..5d4ee3e 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -269,7 +269,7 @@
             t_info.fetchOffset = 0;
             interrupts[curThread]->updateIntrInfo();
             interrupt->invoke(tc);
-            thread->decoder.reset();
+            thread->decoder->reset();
         }
     }
 }
@@ -283,12 +283,12 @@
 
     auto &decoder = thread->decoder;
     Addr instAddr = thread->pcState().instAddr();
-    Addr fetchPC = (instAddr & decoder.pcMask()) + t_info.fetchOffset;
+    Addr fetchPC = (instAddr & decoder->pcMask()) + t_info.fetchOffset;
 
     // set up memory request for instruction fetch
     DPRINTF(Fetch, "Fetch: Inst PC:%08p, Fetch PC:%08p\n", instAddr, fetchPC);
 
-    req->setVirt(fetchPC, decoder.moreBytesSize(), Request::INST_FETCH,
+    req->setVirt(fetchPC, decoder->moreBytesSize(), Request::INST_FETCH,
                  instRequestorId(), instAddr);
 }
 
@@ -320,7 +320,7 @@
 
     if (isRomMicroPC(pc_state.microPC())) {
         t_info.stayAtPC = false;
-        curStaticInst = decoder.fetchRomMicroop(
+        curStaticInst = decoder->fetchRomMicroop(
                 pc_state.microPC(), curMacroStaticInst);
     } else if (!curMacroStaticInst) {
         //We're not in the middle of a macro instruction
@@ -329,19 +329,19 @@
         //Predecode, ie bundle up an ExtMachInst
         //If more fetch data is needed, pass it in.
         Addr fetch_pc =
-            (pc_state.instAddr() & decoder.pcMask()) + t_info.fetchOffset;
+            (pc_state.instAddr() & decoder->pcMask()) + t_info.fetchOffset;
 
-        decoder.moreBytes(pc_state, fetch_pc);
+        decoder->moreBytes(pc_state, fetch_pc);
 
         //Decode an instruction if one is ready. Otherwise, we'll have to
         //fetch beyond the MachInst at the current pc.
-        instPtr = decoder.decode(pc_state);
+        instPtr = decoder->decode(pc_state);
         if (instPtr) {
             t_info.stayAtPC = false;
             thread->pcState(pc_state);
         } else {
             t_info.stayAtPC = true;
-            t_info.fetchOffset += decoder.moreBytesSize();
+            t_info.fetchOffset += decoder->moreBytesSize();
         }
 
         //If we decoded an instruction and it's microcoded, start pulling
@@ -469,7 +469,7 @@
     if (fault != NoFault) {
         curMacroStaticInst = nullStaticInstPtr;
         fault->invoke(threadContexts[curThread], curStaticInst);
-        thread->decoder.reset();
+        thread->decoder->reset();
     } else {
         if (curStaticInst) {
             if (curStaticInst->isLastMicroop())
diff --git a/src/cpu/simple/noncaching.cc b/src/cpu/simple/noncaching.cc
index 0423b2e..f6ef530 100644
--- a/src/cpu/simple/noncaching.cc
+++ b/src/cpu/simple/noncaching.cc
@@ -95,7 +95,7 @@
 
     auto *bd = bd_it->second;
     Addr offset = ifetch_req->getPaddr() - bd->range().start();
-    memcpy(decoder.moreBytesPtr(), bd->ptr() + offset, ifetch_req->getSize());
+    memcpy(decoder->moreBytesPtr(), bd->ptr() + offset, ifetch_req->getSize());
     return 0;
 }
 
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index dbbc3ad..0ae1a61 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -725,7 +725,7 @@
         DPRINTF(SimpleCPU, "Sending fetch for addr %#x(pa: %#x)\n",
                 req->getVaddr(), req->getPaddr());
         ifetch_pkt = new Packet(req, MemCmd::ReadReq);
-        ifetch_pkt->dataStatic(decoder.moreBytesPtr());
+        ifetch_pkt->dataStatic(decoder->moreBytesPtr());
         DPRINTF(SimpleCPU, " -- pkt addr: %#x\n", ifetch_pkt->getAddr());
 
         if (!icachePort.sendTimingReq(ifetch_pkt)) {
diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc
index c132d72..67a0ce6 100644
--- a/src/cpu/simple_thread.cc
+++ b/src/cpu/simple_thread.cc
@@ -72,7 +72,7 @@
       isa(dynamic_cast<TheISA::ISA *>(_isa)),
       predicate(true), memAccPredicate(true),
       comInstEventQueue("instruction-based event queue"),
-      system(_sys), mmu(_mmu), decoder(isa),
+      system(_sys), mmu(_mmu), decoder(new TheISA::Decoder(isa)),
       htmTransactionStarts(0), htmTransactionStops(0)
 {
     assert(isa);
@@ -95,7 +95,7 @@
 SimpleThread::takeOverFrom(ThreadContext *oldContext)
 {
     gem5::takeOverFrom(*this, *oldContext);
-    decoder.takeOverFrom(oldContext->getDecoderPtr());
+    decoder->takeOverFrom(oldContext->getDecoderPtr());
 
     isa->takeOverFrom(this, oldContext);
 
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index bddc1f5..f896071 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -134,7 +134,7 @@
 
     BaseMMU *mmu;
 
-    TheISA::Decoder decoder;
+    InstDecoder *decoder;
 
     // hardware transactional memory
     int64_t htmTransactionStarts;
@@ -213,7 +213,7 @@
 
     BaseISA *getIsaPtr() override { return isa; }
 
-    TheISA::Decoder *getDecoderPtr() override { return &decoder; }
+    InstDecoder *getDecoderPtr() override { return decoder; }
 
     System *getSystemPtr() override { return system; }
 
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index 3844d0c..e978f37 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -68,6 +68,7 @@
 class BaseTLB;
 class CheckerCPU;
 class Checkpoint;
+class InstDecoder;
 class PortProxy;
 class Process;
 class System;
@@ -143,7 +144,7 @@
 
     virtual BaseISA *getIsaPtr() = 0;
 
-    virtual TheISA::Decoder *getDecoderPtr() = 0;
+    virtual InstDecoder *getDecoderPtr() = 0;
 
     virtual System *getSystemPtr() = 0;
 
diff --git a/src/sim/faults.cc b/src/sim/faults.cc
index f7ca203..98778f2 100644
--- a/src/sim/faults.cc
+++ b/src/sim/faults.cc
@@ -42,7 +42,7 @@
 
 #include <csignal>
 
-#include "arch/decoder.hh"
+#include "arch/generic/decoder.hh"
 #include "base/logging.hh"
 #include "cpu/base.hh"
 #include "cpu/thread_context.hh"
@@ -114,7 +114,7 @@
                                     const StaticInstPtr &inst)
 {
     // reset decoder
-    TheISA::Decoder* dcdr = tc->getDecoderPtr();
+    InstDecoder* dcdr = tc->getDecoderPtr();
     dcdr->reset();
 
     // restore transaction checkpoint
