cpu-o3: Refactor how registers are handled in the DynInst class.

The "Regs" structure in the DynInst class was using placement new to
allocate register arrays in a dynamically allocated blob which can be
resized based on the number of source and destination registers.
Unfortunately, it was assumed that the alignment of the components of
that structure would work out because they were ordered from largest to
smallest, which should imply largest alignment to smallest.

This change instead uses an overloaded new operator to allocate extra
memory for the DynInst itself, and then initialize arrays within that
extra space. The DynInst class then gets pointers to the arrays so it
can access them. This has the benefit that only one chunk of memory is
allocated, instead of one for the DynInst and then a second for the
arrays.

Also, this new version uses the alignof operator to figure out what
alignment is needed for each array, which should avoid any undefined
behavior. The new-ing, initialization, destructing, and delete-ing are
also more carefully orchestrated. Hopefully one or both of these will
squash potential memory management bugs.

Change-Id: Id2fa090b53909f14a8cb39801e9930d4608e42f7
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52485
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 01ec0c8..c10ced0 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -1300,8 +1300,8 @@
 
     // Update the commit rename map
     for (int i = 0; i < head_inst->numDestRegs(); i++) {
-        renameMap[tid]->setEntry(head_inst->regs.flattenedDestIdx(i),
-                                 head_inst->regs.renamedDestIdx(i));
+        renameMap[tid]->setEntry(head_inst->flattenedDestIdx(i),
+                                 head_inst->renamedDestIdx(i));
     }
 
     // hardware transactional memory
diff --git a/src/cpu/o3/dyn_inst.cc b/src/cpu/o3/dyn_inst.cc
index 8284c9a..36591ae 100644
--- a/src/cpu/o3/dyn_inst.cc
+++ b/src/cpu/o3/dyn_inst.cc
@@ -42,6 +42,7 @@
 
 #include <algorithm>
 
+#include "base/intmath.hh"
 #include "debug/DynInst.hh"
 #include "debug/IQ.hh"
 #include "debug/O3PipeView.hh"
@@ -52,13 +53,15 @@
 namespace o3
 {
 
-DynInst::DynInst(const StaticInstPtr &static_inst,
+DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
         const StaticInstPtr &_macroop, InstSeqNum seq_num, CPU *_cpu)
     : seqNum(seq_num), staticInst(static_inst), cpu(_cpu),
-      regs(staticInst->numSrcRegs(), staticInst->numDestRegs()),
-      macroop(_macroop)
+      _numSrcs(arrays.numSrcs), _numDests(arrays.numDests),
+      _flatDestIdx(arrays.flatDestIdx), _destIdx(arrays.destIdx),
+      _prevDestIdx(arrays.prevDestIdx), _srcIdx(arrays.srcIdx),
+      _readySrcIdx(arrays.readySrcIdx), macroop(_macroop)
 {
-    regs.init();
+    std::fill(_readySrcIdx, _readySrcIdx + (numSrcs() + 7) / 8, 0);
 
     status.reset();
 
@@ -89,22 +92,122 @@
 
 }
 
-DynInst::DynInst(const StaticInstPtr &static_inst,
+DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
         const StaticInstPtr &_macroop, const PCStateBase &_pc,
         const PCStateBase &pred_pc, InstSeqNum seq_num, CPU *_cpu)
-    : DynInst(static_inst, _macroop, seq_num, _cpu)
+    : DynInst(arrays, static_inst, _macroop, seq_num, _cpu)
 {
     set(pc, _pc);
     set(predPC, pred_pc);
 }
 
-DynInst::DynInst(const StaticInstPtr &_staticInst,
+DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &_staticInst,
         const StaticInstPtr &_macroop)
-    : DynInst(_staticInst, _macroop, 0, nullptr)
+    : DynInst(arrays, _staticInst, _macroop, 0, nullptr)
 {}
 
+/*
+ * This custom "new" operator uses the default "new" operator to allocate space
+ * for a DynInst, but also pads out the number of bytes to make room for some
+ * extra structures the DynInst needs. We save time and improve performance by
+ * only going to the heap once to get space for all these structures.
+ *
+ * When a DynInst is allocated with new, the compiler will call this "new"
+ * operator with "count" set to the number of bytes it needs to store the
+ * DynInst. We ultimately call into the default new operator to get those
+ * bytes, but before we do, we pad out "count" so that there will be extra
+ * space for some structures the DynInst needs. We take into account both the
+ * absolute size of these structures, and also what alignment they need.
+ *
+ * Once we've gotten a buffer large enough to hold the DynInst itself and these
+ * extra structures, we construct the extra bits using placement new. This
+ * constructs the structures in place in the space we created for them.
+ *
+ * Next, we return the buffer as the result of our operator. The compiler takes
+ * that buffer and constructs the DynInst in the beginning of it using the
+ * DynInst constructor.
+ *
+ * To avoid having to calculate where these extra structures are twice, once
+ * when making room for them and initializing them, and then once again in the
+ * DynInst constructor, we also pass in a structure called "arrays" which holds
+ * pointers to them. The fields of "arrays" are initialized in this operator,
+ * and are then consumed in the DynInst constructor.
+ */
+void *
+DynInst::operator new(size_t count, Arrays &arrays)
+{
+    // Convenience variables for brevity.
+    const auto num_dests = arrays.numDests;
+    const auto num_srcs = arrays.numSrcs;
+
+    // Figure out where everything will go.
+    uintptr_t inst = 0;
+    size_t inst_size = count;
+
+    uintptr_t flat_dest_idx = roundUp(inst + inst_size, alignof(RegId));
+    size_t flat_dest_idx_size = sizeof(*arrays.flatDestIdx) * num_dests;
+
+    uintptr_t dest_idx =
+        roundUp(flat_dest_idx + flat_dest_idx_size, alignof(PhysRegIdPtr));
+    size_t dest_idx_size = sizeof(*arrays.destIdx) * num_dests;
+
+    uintptr_t prev_dest_idx =
+        roundUp(dest_idx + dest_idx_size, alignof(PhysRegIdPtr));
+    size_t prev_dest_idx_size = sizeof(*arrays.prevDestIdx) * num_dests;
+
+    uintptr_t src_idx =
+        roundUp(prev_dest_idx + prev_dest_idx_size, alignof(PhysRegIdPtr));
+    size_t src_idx_size = sizeof(*arrays.srcIdx) * num_srcs;
+
+    uintptr_t ready_src_idx =
+        roundUp(src_idx + src_idx_size, alignof(uint8_t));
+    size_t ready_src_idx_size =
+        sizeof(*arrays.readySrcIdx) * ((num_srcs + 7) / 8);
+
+    // Figure out how much space we need in total.
+    size_t total_size = ready_src_idx + ready_src_idx_size;
+
+    // Actually allocate it.
+    uint8_t *buf = (uint8_t *)::operator new(total_size);
+
+    // Fill in "arrays" with pointers to all the arrays.
+    arrays.flatDestIdx = (RegId *)(buf + flat_dest_idx);
+    arrays.destIdx = (PhysRegIdPtr *)(buf + dest_idx);
+    arrays.prevDestIdx = (PhysRegIdPtr *)(buf + prev_dest_idx);
+    arrays.srcIdx = (PhysRegIdPtr *)(buf + src_idx);
+    arrays.readySrcIdx = (uint8_t *)(buf + ready_src_idx);
+
+    // Initialize all the extra components.
+    new (arrays.flatDestIdx) RegId[num_dests];
+    new (arrays.destIdx) PhysRegIdPtr[num_dests];
+    new (arrays.prevDestIdx) PhysRegIdPtr[num_dests];
+    new (arrays.srcIdx) PhysRegIdPtr[num_srcs];
+    new (arrays.readySrcIdx) uint8_t[num_srcs];
+
+    return buf;
+}
+
 DynInst::~DynInst()
 {
+    /*
+     * The buffer this DynInst occupies also holds some of the structures it
+     * points to. We need to call their destructors manually to make sure that
+     * they're cleaned up appropriately, but we don't need to free their memory
+     * explicitly since that's part of the DynInst's buffer and is already
+     * going to be freed as part of deleting the DynInst.
+     */
+    for (int i = 0; i < _numDests; i++) {
+        _flatDestIdx[i].~RegId();
+        _destIdx[i].~PhysRegIdPtr();
+        _prevDestIdx[i].~PhysRegIdPtr();
+    }
+
+    for (int i = 0; i < _numSrcs; i++)
+        _srcIdx[i].~PhysRegIdPtr();
+
+    for (int i = 0; i < ((_numSrcs + 7) / 8); i++)
+        _readySrcIdx[i].~uint8_t();
+
 #if TRACING_ON
     if (debug::O3PipeView) {
         Tick fetch = fetchTick;
@@ -202,7 +305,7 @@
 void
 DynInst::markSrcRegReady(RegIndex src_idx)
 {
-    regs.readySrcIdx(src_idx, true);
+    readySrcIdx(src_idx, true);
     markSrcRegReady();
 }
 
@@ -222,7 +325,7 @@
     // ensures that dest regs will be pinned to the same phys register if
     // re-rename happens.
     for (int idx = 0; idx < numDestRegs(); idx++) {
-        PhysRegIdPtr phys_dest_reg = regs.renamedDestIdx(idx);
+        PhysRegIdPtr phys_dest_reg = renamedDestIdx(idx);
         if (phys_dest_reg->isPinned()) {
             phys_dest_reg->incrNumPinnedWrites();
             if (isPinnedRegsWritten())
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index 4b4e926..8c8a99a 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -83,13 +83,31 @@
     // The list of instructions iterator type.
     typedef typename std::list<DynInstPtr>::iterator ListIt;
 
+    struct Arrays
+    {
+        size_t numSrcs;
+        size_t numDests;
+
+        RegId *flatDestIdx;
+        PhysRegIdPtr *destIdx;
+        PhysRegIdPtr *prevDestIdx;
+        PhysRegIdPtr *srcIdx;
+        uint8_t *readySrcIdx;
+    };
+
+    static void *operator new(size_t count, Arrays &arrays);
+
     /** BaseDynInst constructor given a binary instruction. */
-    DynInst(const StaticInstPtr &staticInst, const StaticInstPtr
-            &macroop, const PCStateBase &pc, const PCStateBase &pred_pc,
-            InstSeqNum seq_num, CPU *cpu);
+    DynInst(const Arrays &arrays, const StaticInstPtr &staticInst,
+            const StaticInstPtr &macroop, InstSeqNum seq_num, CPU *cpu);
+
+    DynInst(const Arrays &arrays, const StaticInstPtr &staticInst,
+            const StaticInstPtr &macroop, const PCStateBase &pc,
+            const PCStateBase &pred_pc, InstSeqNum seq_num, CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
-    DynInst(const StaticInstPtr &_staticInst, const StaticInstPtr &_macroop);
+    DynInst(const Arrays &arrays, const StaticInstPtr &_staticInst,
+            const StaticInstPtr &_macroop);
 
     ~DynInst();
 
@@ -197,165 +215,103 @@
      */
     std::vector<short> _destMiscRegIdx;
 
-    /**
-     * Collect register related information into a single struct. The number of
-     * source and destination registers can vary, and storage for information
-     * about them needs to be allocated dynamically. This class figures out
-     * how much space is needed and allocates it all at once, and then
-     * trivially divies it up for each type of per-register array.
-     */
-    struct Regs
-    {
-      private:
-        size_t _numSrcs;
-        size_t _numDests;
+    size_t _numSrcs;
+    size_t _numDests;
 
-        using BackingStorePtr = std::unique_ptr<uint8_t[]>;
-        using BufCursor = BackingStorePtr::pointer;
+    // Flattened register index of the destination registers of this
+    // instruction.
+    RegId *_flatDestIdx;
 
-        BackingStorePtr buf;
+    // Physical register index of the destination registers of this
+    // instruction.
+    PhysRegIdPtr *_destIdx;
 
-        // Members should be ordered based on required alignment so that they
-        // can be allocated contiguously.
+    // Physical register index of the previous producers of the
+    // architected destinations.
+    PhysRegIdPtr *_prevDestIdx;
 
-        // Flattened register index of the destination registers of this
-        // instruction.
-        RegId *_flatDestIdx;
+    // Physical register index of the source registers of this instruction.
+    PhysRegIdPtr *_srcIdx;
 
-        // Physical register index of the destination registers of this
-        // instruction.
-        PhysRegIdPtr *_destIdx;
-
-        // Physical register index of the previous producers of the
-        // architected destinations.
-        PhysRegIdPtr *_prevDestIdx;
-
-        static inline size_t
-        bytesForDests(size_t num)
-        {
-            return (sizeof(RegId) + 2 * sizeof(PhysRegIdPtr)) * num;
-        }
-
-        // Physical register index of the source registers of this instruction.
-        PhysRegIdPtr *_srcIdx;
-
-        // Whether or not the source register is ready, one bit per register.
-        uint8_t *_readySrcIdx;
-
-        static inline size_t
-        bytesForSources(size_t num)
-        {
-            return sizeof(PhysRegIdPtr) * num +
-                sizeof(uint8_t) * ((num + 7) / 8);
-        }
-
-        template <class T>
-        static inline void
-        allocate(T *&ptr, BufCursor &cur, size_t count)
-        {
-            ptr = new (cur) T[count];
-            cur += sizeof(T) * count;
-        }
-
-      public:
-        size_t numSrcs() const { return _numSrcs; }
-        size_t numDests() const { return _numDests; }
-
-        void
-        init()
-        {
-            std::fill(_readySrcIdx, _readySrcIdx + (numSrcs() + 7) / 8, 0);
-        }
-
-        Regs(size_t srcs, size_t dests) : _numSrcs(srcs), _numDests(dests),
-            buf(new uint8_t[bytesForSources(srcs) + bytesForDests(dests)])
-        {
-            BufCursor cur = buf.get();
-            allocate(_flatDestIdx, cur, dests);
-            allocate(_destIdx, cur, dests);
-            allocate(_prevDestIdx, cur, dests);
-            allocate(_srcIdx, cur, srcs);
-            allocate(_readySrcIdx, cur, (srcs + 7) / 8);
-
-            init();
-        }
-
-        // Returns the flattened register index of the idx'th destination
-        // register.
-        const RegId &
-        flattenedDestIdx(int idx) const
-        {
-            return _flatDestIdx[idx];
-        }
-
-        // Flattens a destination architectural register index into a logical
-        // index.
-        void
-        flattenedDestIdx(int idx, const RegId &reg_id)
-        {
-            _flatDestIdx[idx] = reg_id;
-        }
-
-        // Returns the physical register index of the idx'th destination
-        // register.
-        PhysRegIdPtr
-        renamedDestIdx(int idx) const
-        {
-            return _destIdx[idx];
-        }
-
-        // Set the renamed dest register id.
-        void
-        renamedDestIdx(int idx, PhysRegIdPtr phys_reg_id)
-        {
-            _destIdx[idx] = phys_reg_id;
-        }
-
-        // Returns the physical register index of the previous physical
-        // register that remapped to the same logical register index.
-        PhysRegIdPtr
-        prevDestIdx(int idx) const
-        {
-            return _prevDestIdx[idx];
-        }
-
-        // Set the previous renamed dest register id.
-        void
-        prevDestIdx(int idx, PhysRegIdPtr phys_reg_id)
-        {
-            _prevDestIdx[idx] = phys_reg_id;
-        }
-
-        // Returns the physical register index of the i'th source register.
-        PhysRegIdPtr
-        renamedSrcIdx(int idx) const
-        {
-            return _srcIdx[idx];
-        }
-
-        void
-        renamedSrcIdx(int idx, PhysRegIdPtr phys_reg_id)
-        {
-            _srcIdx[idx] = phys_reg_id;
-        }
-
-        bool
-        readySrcIdx(int idx) const
-        {
-            uint8_t &byte = _readySrcIdx[idx / 8];
-            return bits(byte, idx % 8);
-        }
-
-        void
-        readySrcIdx(int idx, bool ready)
-        {
-            uint8_t &byte = _readySrcIdx[idx / 8];
-            replaceBits(byte, idx % 8, ready ? 1 : 0);
-        }
-    };
+    // Whether or not the source register is ready, one bit per register.
+    uint8_t *_readySrcIdx;
 
   public:
-    Regs regs;
+    size_t numSrcs() const { return _numSrcs; }
+    size_t numDests() const { return _numDests; }
+
+    // Returns the flattened register index of the idx'th destination
+    // register.
+    const RegId &
+    flattenedDestIdx(int idx) const
+    {
+        return _flatDestIdx[idx];
+    }
+
+    // Flattens a destination architectural register index into a logical
+    // index.
+    void
+    flattenedDestIdx(int idx, const RegId &reg_id)
+    {
+        _flatDestIdx[idx] = reg_id;
+    }
+
+    // Returns the physical register index of the idx'th destination
+    // register.
+    PhysRegIdPtr
+    renamedDestIdx(int idx) const
+    {
+        return _destIdx[idx];
+    }
+
+    // Set the renamed dest register id.
+    void
+    renamedDestIdx(int idx, PhysRegIdPtr phys_reg_id)
+    {
+        _destIdx[idx] = phys_reg_id;
+    }
+
+    // Returns the physical register index of the previous physical
+    // register that remapped to the same logical register index.
+    PhysRegIdPtr
+    prevDestIdx(int idx) const
+    {
+        return _prevDestIdx[idx];
+    }
+
+    // Set the previous renamed dest register id.
+    void
+    prevDestIdx(int idx, PhysRegIdPtr phys_reg_id)
+    {
+        _prevDestIdx[idx] = phys_reg_id;
+    }
+
+    // Returns the physical register index of the i'th source register.
+    PhysRegIdPtr
+    renamedSrcIdx(int idx) const
+    {
+        return _srcIdx[idx];
+    }
+
+    void
+    renamedSrcIdx(int idx, PhysRegIdPtr phys_reg_id)
+    {
+        _srcIdx[idx] = phys_reg_id;
+    }
+
+    bool
+    readySrcIdx(int idx) const
+    {
+        uint8_t &byte = _readySrcIdx[idx / 8];
+        return bits(byte, idx % 8);
+    }
+
+    void
+    readySrcIdx(int idx, bool ready)
+    {
+        uint8_t &byte = _readySrcIdx[idx / 8];
+        replaceBits(byte, idx % 8, ready ? 1 : 0);
+    }
 
     /** The thread this instruction is from. */
     ThreadID threadNumber = 0;
@@ -507,8 +463,8 @@
     renameDestReg(int idx, PhysRegIdPtr renamed_dest,
                   PhysRegIdPtr previous_rename)
     {
-        regs.renamedDestIdx(idx, renamed_dest);
-        regs.prevDestIdx(idx, previous_rename);
+        renamedDestIdx(idx, renamed_dest);
+        prevDestIdx(idx, previous_rename);
         if (renamed_dest->isPinned())
             setPinnedRegsRenamed();
     }
@@ -520,7 +476,7 @@
     void
     renameSrcReg(int idx, PhysRegIdPtr renamed_src)
     {
-        regs.renamedSrcIdx(idx, renamed_src);
+        renamedSrcIdx(idx, renamed_src);
     }
 
     /** Dumps out contents of this BaseDynInst. */
@@ -725,10 +681,10 @@
     }
 
     /** Returns the number of source registers. */
-    size_t numSrcRegs() const { return regs.numSrcs(); }
+    size_t numSrcRegs() const { return numSrcs(); }
 
     /** Returns the number of destination registers. */
-    size_t numDestRegs() const { return regs.numDests(); }
+    size_t numDestRegs() const { return numDests(); }
 
     // the following are used to track physical register usage
     // for machines with separate int & FP reg files
@@ -1143,7 +1099,7 @@
     {
 
         for (int idx = 0; idx < numDestRegs(); idx++) {
-            PhysRegIdPtr prev_phys_reg = regs.prevDestIdx(idx);
+            PhysRegIdPtr prev_phys_reg = prevDestIdx(idx);
             const RegId& original_dest_reg = staticInst->destRegIdx(idx);
             switch (original_dest_reg.classValue()) {
               case IntRegClass:
@@ -1198,19 +1154,19 @@
     RegVal
     readIntRegOperand(const StaticInst *si, int idx) override
     {
-        return cpu->readIntReg(regs.renamedSrcIdx(idx));
+        return cpu->readIntReg(renamedSrcIdx(idx));
     }
 
     RegVal
     readFloatRegOperandBits(const StaticInst *si, int idx) override
     {
-        return cpu->readFloatReg(regs.renamedSrcIdx(idx));
+        return cpu->readFloatReg(renamedSrcIdx(idx));
     }
 
     const TheISA::VecRegContainer&
     readVecRegOperand(const StaticInst *si, int idx) const override
     {
-        return cpu->readVecReg(regs.renamedSrcIdx(idx));
+        return cpu->readVecReg(renamedSrcIdx(idx));
     }
 
     /**
@@ -1219,31 +1175,31 @@
     TheISA::VecRegContainer&
     getWritableVecRegOperand(const StaticInst *si, int idx) override
     {
-        return cpu->getWritableVecReg(regs.renamedDestIdx(idx));
+        return cpu->getWritableVecReg(renamedDestIdx(idx));
     }
 
     RegVal
     readVecElemOperand(const StaticInst *si, int idx) const override
     {
-        return cpu->readVecElem(regs.renamedSrcIdx(idx));
+        return cpu->readVecElem(renamedSrcIdx(idx));
     }
 
     const TheISA::VecPredRegContainer&
     readVecPredRegOperand(const StaticInst *si, int idx) const override
     {
-        return cpu->readVecPredReg(regs.renamedSrcIdx(idx));
+        return cpu->readVecPredReg(renamedSrcIdx(idx));
     }
 
     TheISA::VecPredRegContainer&
     getWritableVecPredRegOperand(const StaticInst *si, int idx) override
     {
-        return cpu->getWritableVecPredReg(regs.renamedDestIdx(idx));
+        return cpu->getWritableVecPredReg(renamedDestIdx(idx));
     }
 
     RegVal
     readCCRegOperand(const StaticInst *si, int idx) override
     {
-        return cpu->readCCReg(regs.renamedSrcIdx(idx));
+        return cpu->readCCReg(renamedSrcIdx(idx));
     }
 
     /** @todo: Make results into arrays so they can handle multiple dest
@@ -1252,14 +1208,14 @@
     void
     setIntRegOperand(const StaticInst *si, int idx, RegVal val) override
     {
-        cpu->setIntReg(regs.renamedDestIdx(idx), val);
+        cpu->setIntReg(renamedDestIdx(idx), val);
         setResult(val);
     }
 
     void
     setFloatRegOperandBits(const StaticInst *si, int idx, RegVal val) override
     {
-        cpu->setFloatReg(regs.renamedDestIdx(idx), val);
+        cpu->setFloatReg(renamedDestIdx(idx), val);
         setResult(val);
     }
 
@@ -1267,7 +1223,7 @@
     setVecRegOperand(const StaticInst *si, int idx,
                      const TheISA::VecRegContainer& val) override
     {
-        cpu->setVecReg(regs.renamedDestIdx(idx), val);
+        cpu->setVecReg(renamedDestIdx(idx), val);
         setResult(val);
     }
 
@@ -1275,7 +1231,7 @@
     setVecElemOperand(const StaticInst *si, int idx, RegVal val) override
     {
         int reg_idx = idx;
-        cpu->setVecElem(regs.renamedDestIdx(reg_idx), val);
+        cpu->setVecElem(renamedDestIdx(reg_idx), val);
         setResult(val);
     }
 
@@ -1283,14 +1239,14 @@
     setVecPredRegOperand(const StaticInst *si, int idx,
                          const TheISA::VecPredRegContainer& val) override
     {
-        cpu->setVecPredReg(regs.renamedDestIdx(idx), val);
+        cpu->setVecPredReg(renamedDestIdx(idx), val);
         setResult(val);
     }
 
     void
     setCCRegOperand(const StaticInst *si, int idx, RegVal val) override
     {
-        cpu->setCCReg(regs.renamedDestIdx(idx), val);
+        cpu->setCCReg(renamedDestIdx(idx), val);
         setResult(val);
     }
 };
diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
index b87af34..3b93160 100644
--- a/src/cpu/o3/fetch.cc
+++ b/src/cpu/o3/fetch.cc
@@ -1048,9 +1048,13 @@
     // Get a sequence number.
     InstSeqNum seq = cpu->getAndIncrementInstSeq();
 
+    DynInst::Arrays arrays;
+    arrays.numSrcs = staticInst->numSrcRegs();
+    arrays.numDests = staticInst->numDestRegs();
+
     // Create a new DynInst from the instruction fetched.
-    DynInstPtr instruction =
-        new DynInst(staticInst, curMacroop, this_pc, next_pc, seq, cpu);
+    DynInstPtr instruction = new (arrays) DynInst(
+            arrays, staticInst, curMacroop, this_pc, next_pc, seq, cpu);
     instruction->setTid(tid);
 
     instruction->setThreadState(cpu->thread[tid]);
diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc
index 2795919..8dbd5b3 100644
--- a/src/cpu/o3/iew.cc
+++ b/src/cpu/o3/iew.cc
@@ -1410,12 +1410,12 @@
 
             for (int i = 0; i < inst->numDestRegs(); i++) {
                 // Mark register as ready if not pinned
-                if (inst->regs.renamedDestIdx(i)->
+                if (inst->renamedDestIdx(i)->
                         getNumPinnedWritesToComplete() == 0) {
                     DPRINTF(IEW,"Setting Destination Register %i (%s)\n",
-                            inst->regs.renamedDestIdx(i)->index(),
-                            inst->regs.renamedDestIdx(i)->className());
-                    scoreboard->setReg(inst->regs.renamedDestIdx(i));
+                            inst->renamedDestIdx(i)->index(),
+                            inst->renamedDestIdx(i)->className());
+                    scoreboard->setReg(inst->renamedDestIdx(i));
                 }
             }
 
diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc
index 775fbcd..0fac84f 100644
--- a/src/cpu/o3/inst_queue.cc
+++ b/src/cpu/o3/inst_queue.cc
@@ -1002,7 +1002,7 @@
          dest_reg_idx++)
     {
         PhysRegIdPtr dest_reg =
-            completed_inst->regs.renamedDestIdx(dest_reg_idx);
+            completed_inst->renamedDestIdx(dest_reg_idx);
 
         // Special case of uniq or control registers.  They are not
         // handled by the IQ and thus have no dependency graph entry.
@@ -1234,7 +1234,7 @@
                      src_reg_idx++)
                 {
                     PhysRegIdPtr src_reg =
-                        squashed_inst->regs.renamedSrcIdx(src_reg_idx);
+                        squashed_inst->renamedSrcIdx(src_reg_idx);
 
                     // Only remove it from the dependency graph if it
                     // was placed there in the first place.
@@ -1245,7 +1245,7 @@
                     // overwritten.  The only downside to this is it
                     // leaves more room for error.
 
-                    if (!squashed_inst->regs.readySrcIdx(src_reg_idx) &&
+                    if (!squashed_inst->readySrcIdx(src_reg_idx) &&
                         !src_reg->isFixedMapping()) {
                         dependGraph.remove(src_reg->flatIndex(),
                                            squashed_inst);
@@ -1307,7 +1307,7 @@
              dest_reg_idx++)
         {
             PhysRegIdPtr dest_reg =
-                squashed_inst->regs.renamedDestIdx(dest_reg_idx);
+                squashed_inst->renamedDestIdx(dest_reg_idx);
             if (dest_reg->isFixedMapping()){
                 continue;
             }
@@ -1339,8 +1339,8 @@
          src_reg_idx++)
     {
         // Only add it to the dependency graph if it's not ready.
-        if (!new_inst->regs.readySrcIdx(src_reg_idx)) {
-            PhysRegIdPtr src_reg = new_inst->regs.renamedSrcIdx(src_reg_idx);
+        if (!new_inst->readySrcIdx(src_reg_idx)) {
+            PhysRegIdPtr src_reg = new_inst->renamedSrcIdx(src_reg_idx);
 
             // Check the IQ's scoreboard to make sure the register
             // hasn't become ready while the instruction was in flight
@@ -1386,7 +1386,7 @@
          dest_reg_idx < total_dest_regs;
          dest_reg_idx++)
     {
-        PhysRegIdPtr dest_reg = new_inst->regs.renamedDestIdx(dest_reg_idx);
+        PhysRegIdPtr dest_reg = new_inst->renamedDestIdx(dest_reg_idx);
 
         // Some registers have fixed mapping, and there is no need to track
         // dependencies as these instructions must be executed at commit.
diff --git a/src/cpu/o3/probe/elastic_trace.cc b/src/cpu/o3/probe/elastic_trace.cc
index 95baa59..e93dad3 100644
--- a/src/cpu/o3/probe/elastic_trace.cc
+++ b/src/cpu/o3/probe/elastic_trace.cc
@@ -254,7 +254,7 @@
         if (!src_reg.is(MiscRegClass) &&
                 !(src_reg.is(IntRegClass) && src_reg.index() == zeroReg)) {
             // Get the physical register index of the i'th source register.
-            PhysRegIdPtr phys_src_reg = dyn_inst->regs.renamedSrcIdx(src_idx);
+            PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcIdx(src_idx);
             DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
                      " %i (%s)\n", seq_num,
                      phys_src_reg->flatIndex(), phys_src_reg->className());
@@ -288,7 +288,7 @@
             // Get the physical register index of the i'th destination
             // register.
             PhysRegIdPtr phys_dest_reg =
-                dyn_inst->regs.renamedDestIdx(dest_idx);
+                dyn_inst->renamedDestIdx(dest_idx);
             DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
                      " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
                      dest_reg.className());
diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc
index 12408cd..457b1e9 100644
--- a/src/cpu/o3/rename.cc
+++ b/src/cpu/o3/rename.cc
@@ -1089,7 +1089,7 @@
 
         rename_result = map->rename(flat_dest_regid);
 
-        inst->regs.flattenedDestIdx(dest_idx, flat_dest_regid);
+        inst->flattenedDestIdx(dest_idx, flat_dest_regid);
 
         scoreboard->unsetReg(rename_result.first);