Merge zizzer:/bk/newmem
into  zower.eecs.umich.edu:/eecshome/m5/newmem

src/arch/sparc/isa/formats/mem/util.isa:
src/arch/sparc/isa_traits.hh:
src/arch/sparc/system.cc:
    Hand Merge

--HG--
extra : convert_revision : d5e0c97caebb616493e2f642e915969d7028109c
diff --git a/configs/example/fs.py b/configs/example/fs.py
index 5d7d496..b878f2b 100644
--- a/configs/example/fs.py
+++ b/configs/example/fs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2006 The Regents of The University of Michigan
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/src/SConscript b/src/SConscript
index a94682b..6c179f8 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -62,6 +62,7 @@
 	base/pollevent.cc
 	base/range.cc
 	base/random.cc
+	base/remote_gdb.cc
 	base/sat_counter.cc
 	base/socket.cc
 	base/statistics.cc
@@ -172,7 +173,6 @@
 full_system_sources = Split('''
 	base/crc.cc
 	base/inet.cc
-	base/remote_gdb.cc
 
 	cpu/intr_control.cc
         cpu/profile.cc
diff --git a/src/arch/alpha/SConscript b/src/arch/alpha/SConscript
index 3cc5ec2..addd498 100644
--- a/src/arch/alpha/SConscript
+++ b/src/arch/alpha/SConscript
@@ -52,6 +52,7 @@
 	intregfile.cc
 	miscregfile.cc
 	regfile.cc
+	remote_gdb.cc
 	''')
 
 # Full-system sources
@@ -66,7 +67,6 @@
 	osfpal.cc
 	pagetable.cc
 	stacktrace.cc
-	remote_gdb.cc
 	system.cc
 	tlb.cc
 	tru64/system.cc
diff --git a/src/arch/alpha/isa/branch.isa b/src/arch/alpha/isa/branch.isa
index 7438e7e..974193e 100644
--- a/src/arch/alpha/isa/branch.isa
+++ b/src/arch/alpha/isa/branch.isa
@@ -218,7 +218,7 @@
 
 def format CondBranch(code) {{
     code = 'bool cond;\n' + code + '\nif (cond) NPC = NPC + disp;\n';
-    iop = InstObjParams(name, Name, 'Branch', CodeBlock(code),
+    iop = InstObjParams(name, Name, 'Branch', code,
                         ('IsDirectControl', 'IsCondControl'))
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
@@ -230,8 +230,7 @@
 def UncondCtrlBase(name, Name, base_class, npc_expr, flags):
     # Declare basic control transfer w/o link (i.e. link reg is R31)
     nolink_code = 'NPC = %s;\n' % npc_expr
-    nolink_iop = InstObjParams(name, Name, base_class,
-                               CodeBlock(nolink_code), flags)
+    nolink_iop = InstObjParams(name, Name, base_class, nolink_code, flags)
     header_output = BasicDeclare.subst(nolink_iop)
     decoder_output = BasicConstructor.subst(nolink_iop)
     exec_output = BasicExecute.subst(nolink_iop)
@@ -239,7 +238,7 @@
     # Generate declaration of '*AndLink' version, append to decls
     link_code = 'Ra = NPC & ~3;\n' + nolink_code
     link_iop = InstObjParams(name, Name + 'AndLink', base_class,
-                             CodeBlock(link_code), flags)
+                             link_code, flags)
     header_output += BasicDeclare.subst(link_iop)
     decoder_output += BasicConstructor.subst(link_iop)
     exec_output += BasicExecute.subst(link_iop)
diff --git a/src/arch/alpha/isa/fp.isa b/src/arch/alpha/isa/fp.isa
index 3b5575f..c845ea4 100644
--- a/src/arch/alpha/isa/fp.isa
+++ b/src/arch/alpha/isa/fp.isa
@@ -293,7 +293,7 @@
 //   currently unimplemented (will fail).
 // - Generates NOP if FC == 31.
 def format FloatingPointOperate(code, *opt_args) {{
-    iop = InstObjParams(name, Name, 'AlphaFP', CodeBlock(code), opt_args)
+    iop = InstObjParams(name, Name, 'AlphaFP', code, opt_args)
     decode_block = FloatingPointDecode.subst(iop)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
@@ -303,7 +303,7 @@
 // Special format for cvttq where rounding mode is pre-decoded
 def format FPFixedRounding(code, class_suffix, *opt_args) {{
     Name += class_suffix
-    iop = InstObjParams(name, Name, 'AlphaFP', CodeBlock(code), opt_args)
+    iop = InstObjParams(name, Name, 'AlphaFP', code, opt_args)
     decode_block = FloatingPointDecode.subst(iop)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
diff --git a/src/arch/alpha/isa/int.isa b/src/arch/alpha/isa/int.isa
index 45e096e..bd9c3cc 100644
--- a/src/arch/alpha/isa/int.isa
+++ b/src/arch/alpha/isa/int.isa
@@ -113,16 +113,14 @@
         imm_code = re.sub(r'Rb_or_imm(\.\w+)?', 'imm', orig_code)
 
     # generate declaration for register version
-    cblk = CodeBlock(code)
-    iop = InstObjParams(name, Name, 'AlphaStaticInst', cblk, opt_flags)
+    iop = InstObjParams(name, Name, 'AlphaStaticInst', code, opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     exec_output = BasicExecute.subst(iop)
 
     if uses_imm:
         # append declaration for imm version
-        imm_cblk = CodeBlock(imm_code)
-        imm_iop = InstObjParams(name, Name + 'Imm', 'IntegerImm', imm_cblk,
+        imm_iop = InstObjParams(name, Name + 'Imm', 'IntegerImm', imm_code,
                                 opt_flags)
         header_output += BasicDeclare.subst(imm_iop)
         decoder_output += BasicConstructor.subst(imm_iop)
diff --git a/src/arch/alpha/isa/main.isa b/src/arch/alpha/isa/main.isa
index 6e65cf9..d72dfe3 100644
--- a/src/arch/alpha/isa/main.isa
+++ b/src/arch/alpha/isa/main.isa
@@ -338,7 +338,7 @@
 
 // The most basic instruction format... used only for a few misc. insts
 def format BasicOperate(code, *flags) {{
-    iop = InstObjParams(name, Name, 'AlphaStaticInst', CodeBlock(code), flags)
+    iop = InstObjParams(name, Name, 'AlphaStaticInst', code, flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
@@ -424,8 +424,7 @@
 
 // Like BasicOperate format, but generates NOP if RC/FC == 31
 def format BasicOperateWithNopCheck(code, *opt_args) {{
-    iop = InstObjParams(name, Name, 'AlphaStaticInst', CodeBlock(code),
-                        opt_args)
+    iop = InstObjParams(name, Name, 'AlphaStaticInst', code, opt_args)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = OperateNopCheckDecode.subst(iop)
diff --git a/src/arch/alpha/isa/mem.isa b/src/arch/alpha/isa/mem.isa
index 02291ed..c0bdd2c 100644
--- a/src/arch/alpha/isa/mem.isa
+++ b/src/arch/alpha/isa/mem.isa
@@ -126,7 +126,7 @@
 }};
 
 def format LoadAddress(code) {{
-    iop = InstObjParams(name, Name, 'MemoryDisp32', CodeBlock(code))
+    iop = InstObjParams(name, Name, 'MemoryDisp32', code)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
@@ -191,22 +191,28 @@
 }};
 
 
-def template LoadStoreConstructor {{
+def template EACompConstructor {{
     /** TODO: change op_class to AddrGenOp or something (requires
      * creating new member of OpClass enum in op_class.hh, updating
      * config files, etc.). */
     inline %(class_name)s::EAComp::EAComp(ExtMachInst machInst)
         : %(base_class)s("%(mnemonic)s (EAComp)", machInst, IntAluOp)
     {
-        %(ea_constructor)s;
+        %(constructor)s;
     }
+}};
 
+
+def template MemAccConstructor {{
     inline %(class_name)s::MemAcc::MemAcc(ExtMachInst machInst)
         : %(base_class)s("%(mnemonic)s (MemAcc)", machInst, %(op_class)s)
     {
-        %(memacc_constructor)s;
+        %(constructor)s;
     }
+}};
 
+
+def template LoadStoreConstructor {{
     inline %(class_name)s::%(class_name)s(ExtMachInst machInst)
          : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
                           new EAComp(machInst), new MemAcc(machInst))
@@ -227,7 +233,7 @@
         %(fp_enable_check)s;
         %(op_decl)s;
         %(op_rd)s;
-        %(code)s;
+        %(ea_code)s;
 
         if (fault == NoFault) {
             %(op_wb)s;
@@ -253,7 +259,7 @@
 
         if (fault == NoFault) {
             fault = xc->read(EA, (uint%(mem_acc_size)d_t&)Mem, memAccessFlags);
-            %(code)s;
+            %(memacc_code)s;
         }
 
         if (fault == NoFault) {
@@ -352,7 +358,7 @@
         EA = xc->getEA();
 
         if (fault == NoFault) {
-            %(code)s;
+            %(memacc_code)s;
         }
 
         if (fault == NoFault) {
@@ -497,7 +503,7 @@
         EA = xc->getEA();
 
         if (fault == NoFault) {
-            %(code)s;
+            %(memacc_code)s;
         }
 
         return NoFault;
@@ -582,63 +588,24 @@
     # add hook to get effective addresses into execution trace output.
     ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
 
-    # generate code block objects
-    ea_cblk = CodeBlock(ea_code)
-    memacc_cblk = CodeBlock(memacc_code)
-    postacc_cblk = CodeBlock(postacc_code)
-
     # Some CPU models execute the memory operation as an atomic unit,
     # while others want to separate them into an effective address
     # computation and a memory access operation.  As a result, we need
     # to generate three StaticInst objects.  Note that the latter two
     # are nested inside the larger "atomic" one.
 
-    # generate InstObjParams for EAComp object
-    ea_iop = InstObjParams(name, Name, base_class, ea_cblk, inst_flags)
-
-    # generate InstObjParams for MemAcc object
-    memacc_iop = InstObjParams(name, Name, base_class, memacc_cblk, inst_flags)
-    # in the split execution model, the MemAcc portion is responsible
-    # for the post-access code.
-    memacc_iop.postacc_code = postacc_cblk.code
-
-    # generate InstObjParams for InitiateAcc, CompleteAcc object
-    # The code used depends on the template being used
-    if (exec_template_base == 'Load'):
-        initiateacc_cblk = CodeBlock(ea_code + memacc_code)
-        completeacc_cblk = CodeBlock(memacc_code + postacc_code)
-    elif (exec_template_base.startswith('Store')):
-        initiateacc_cblk = CodeBlock(ea_code + memacc_code)
-        completeacc_cblk = CodeBlock(postacc_code)
-    else:
-        initiateacc_cblk = ''
-        completeacc_cblk = ''
-
-    initiateacc_iop = InstObjParams(name, Name, base_class, initiateacc_cblk,
-                                    inst_flags)
-
-    completeacc_iop = InstObjParams(name, Name, base_class, completeacc_cblk,
-                                    inst_flags)
-
-    if (exec_template_base == 'Load'):
-        initiateacc_iop.ea_code = ea_cblk.code
-        initiateacc_iop.memacc_code = memacc_cblk.code
-        completeacc_iop.memacc_code = memacc_cblk.code
-        completeacc_iop.postacc_code = postacc_cblk.code
-    elif (exec_template_base.startswith('Store')):
-        initiateacc_iop.ea_code = ea_cblk.code
-        initiateacc_iop.memacc_code = memacc_cblk.code
-        completeacc_iop.postacc_code = postacc_cblk.code
-
-    # generate InstObjParams for unified execution
-    cblk = CodeBlock(ea_code + memacc_code + postacc_code)
-    iop = InstObjParams(name, Name, base_class, cblk, inst_flags)
-
-    iop.ea_constructor = ea_cblk.constructor
-    iop.ea_code = ea_cblk.code
-    iop.memacc_constructor = memacc_cblk.constructor
-    iop.memacc_code = memacc_cblk.code
-    iop.postacc_code = postacc_cblk.code
+    # Generate InstObjParams for each of the three objects.  Note that
+    # they differ only in the set of code objects contained (which in
+    # turn affects the object's overall operand list).
+    iop = InstObjParams(name, Name, base_class,
+                        { 'ea_code':ea_code, 'memacc_code':memacc_code, 'postacc_code':postacc_code },
+                        inst_flags)
+    ea_iop = InstObjParams(name, Name, base_class,
+                        { 'ea_code':ea_code },
+                        inst_flags)
+    memacc_iop = InstObjParams(name, Name, base_class,
+                        { 'memacc_code':memacc_code, 'postacc_code':postacc_code },
+                        inst_flags)
 
     if mem_flags:
         s = '\n\tmemAccessFlags = ' + string.join(mem_flags, '|') + ';'
@@ -659,13 +626,16 @@
     completeAccTemplate = eval(exec_template_base + 'CompleteAcc')
 
     # (header_output, decoder_output, decode_block, exec_output)
-    return (LoadStoreDeclare.subst(iop), LoadStoreConstructor.subst(iop),
+    return (LoadStoreDeclare.subst(iop),
+            EACompConstructor.subst(ea_iop)
+            + MemAccConstructor.subst(memacc_iop)
+            + LoadStoreConstructor.subst(iop),
             decode_template.subst(iop),
             EACompExecute.subst(ea_iop)
             + memAccExecTemplate.subst(memacc_iop)
             + fullExecTemplate.subst(iop)
-            + initiateAccTemplate.subst(initiateacc_iop)
-            + completeAccTemplate.subst(completeacc_iop))
+            + initiateAccTemplate.subst(iop)
+            + completeAccTemplate.subst(iop))
 }};
 
 def format LoadOrNop(memacc_code, ea_code = {{ EA = Rb + disp; }},
diff --git a/src/arch/alpha/isa/pal.isa b/src/arch/alpha/isa/pal.isa
index f4c10da..294b92e 100644
--- a/src/arch/alpha/isa/pal.isa
+++ b/src/arch/alpha/isa/pal.isa
@@ -68,7 +68,7 @@
 }};
 
 def format EmulatedCallPal(code, *flags) {{
-    iop = InstObjParams(name, Name, 'EmulatedCallPal', CodeBlock(code), flags)
+    iop = InstObjParams(name, Name, 'EmulatedCallPal', code, flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
@@ -131,7 +131,7 @@
 }};
 
 def format CallPal(code, *flags) {{
-    iop = InstObjParams(name, Name, 'CallPalBase', CodeBlock(code), flags)
+    iop = InstObjParams(name, Name, 'CallPalBase', code, flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
@@ -269,8 +269,7 @@
 def format HwMoveIPR(code, *flags) {{
     all_flags = ['IprAccessOp']
     all_flags += flags
-    iop = InstObjParams(name, Name, 'HwMoveIPR', CodeBlock(code),
-                        all_flags)
+    iop = InstObjParams(name, Name, 'HwMoveIPR', code, all_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/alpha/isa_traits.hh b/src/arch/alpha/isa_traits.hh
index 35d9ce8..a267ac0 100644
--- a/src/arch/alpha/isa_traits.hh
+++ b/src/arch/alpha/isa_traits.hh
@@ -132,7 +132,7 @@
     const int NumPALShadowRegs = 8;
     const int NumFloatArchRegs = 32;
     // @todo: Figure out what this number really should be.
-    const int NumMiscArchRegs = 32;
+    const int NumMiscArchRegs = 77;
 
     const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs;
     const int NumFloatRegs = NumFloatArchRegs;
diff --git a/src/arch/alpha/regfile.hh b/src/arch/alpha/regfile.hh
index ff58308..54372da 100644
--- a/src/arch/alpha/regfile.hh
+++ b/src/arch/alpha/regfile.hh
@@ -78,13 +78,11 @@
 
         Addr readNextNPC()
         {
-            return nnpc;
+            return npc + sizeof(MachInst);
         }
 
         void setNextNPC(Addr val)
-        {
-            nnpc = val;
-        }
+        { }
 
       protected:
         IntRegFile intRegFile;		// (signed) integer register file
@@ -189,6 +187,11 @@
         }
     };
 
+    static inline int flattenIntIndex(ThreadContext * tc, int reg)
+    {
+        return reg;
+    }
+
     void copyRegs(ThreadContext *src, ThreadContext *dest);
 
     void copyMiscRegs(ThreadContext *src, ThreadContext *dest);
diff --git a/src/arch/alpha/remote_gdb.cc b/src/arch/alpha/remote_gdb.cc
index f23fc32..4637bd7 100644
--- a/src/arch/alpha/remote_gdb.cc
+++ b/src/arch/alpha/remote_gdb.cc
@@ -121,14 +121,18 @@
 #include <string>
 #include <unistd.h>
 
+#include "config/full_system.hh"
+#if FULL_SYSTEM
+#include "arch/alpha/vtophys.hh"
+#endif
+
 #include "arch/alpha/kgdb.h"
+#include "arch/alpha/utility.hh"
 #include "arch/alpha/remote_gdb.hh"
-#include "arch/vtophys.hh"
 #include "base/intmath.hh"
 #include "base/remote_gdb.hh"
 #include "base/socket.hh"
 #include "base/trace.hh"
-#include "config/full_system.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/static_inst.hh"
 #include "mem/physical.hh"
@@ -152,6 +156,9 @@
 bool
 RemoteGDB::acc(Addr va, size_t len)
 {
+#if !FULL_SYSTEM
+    panic("acc function needs to be rewritten for SE mode\n");
+#else
     Addr last_va;
 
     va = TheISA::TruncPage(va);
@@ -191,6 +198,7 @@
 
     DPRINTF(GDBAcc, "acc:   %#x mapping is valid\n", va);
     return true;
+#endif
 }
 
 ///////////////////////////////////////////////////////////
diff --git a/src/arch/alpha/syscallreturn.hh b/src/arch/alpha/syscallreturn.hh
index 803c3b7..47b4ac8 100644
--- a/src/arch/alpha/syscallreturn.hh
+++ b/src/arch/alpha/syscallreturn.hh
@@ -32,54 +32,25 @@
 #ifndef __ARCH_ALPHA_SYSCALLRETURN_HH__
 #define __ARCH_ALPHA_SYSCALLRETURN_HH__
 
-class SyscallReturn {
-    public:
-       template <class T>
-       SyscallReturn(T v, bool s)
-       {
-           retval = (uint64_t)v;
-           success = s;
-       }
-
-       template <class T>
-       SyscallReturn(T v)
-       {
-           success = (v >= 0);
-           retval = (uint64_t)v;
-       }
-
-       ~SyscallReturn() {}
-
-       SyscallReturn& operator=(const SyscallReturn& s) {
-           retval = s.retval;
-           success = s.success;
-           return *this;
-       }
-
-       bool successful() { return success; }
-       uint64_t value() { return retval; }
-
-
-   private:
-       uint64_t retval;
-       bool success;
-};
+#include "cpu/thread_context.hh"
+#include "sim/syscallreturn.hh"
 
 namespace AlphaISA
 {
-    static inline void setSyscallReturn(SyscallReturn return_value, RegFile *regs)
+    static inline void setSyscallReturn(SyscallReturn return_value,
+            ThreadContext * tc)
     {
         // check for error condition.  Alpha syscall convention is to
         // indicate success/failure in reg a3 (r19) and put the
         // return value itself in the standard return value reg (v0).
         if (return_value.successful()) {
             // no error
-            regs->setIntReg(SyscallSuccessReg, 0);
-            regs->setIntReg(ReturnValueReg, return_value.value());
+            tc->setIntReg(SyscallSuccessReg, 0);
+            tc->setIntReg(ReturnValueReg, return_value.value());
         } else {
             // got an error, return details
-            regs->setIntReg(SyscallSuccessReg, (IntReg)-1);
-            regs->setIntReg(ReturnValueReg, -return_value.value());
+            tc->setIntReg(SyscallSuccessReg, (IntReg)-1);
+            tc->setIntReg(ReturnValueReg, -return_value.value());
         }
     }
 }
diff --git a/src/arch/alpha/system.cc b/src/arch/alpha/system.cc
index cd92394..ed0938a 100644
--- a/src/arch/alpha/system.cc
+++ b/src/arch/alpha/system.cc
@@ -195,12 +195,6 @@
         panic("could not find m5AlphaAccess\n");
 }
 
-bool
-AlphaSystem::breakpoint()
-{
-    return remoteGDB[0]->trap(SIGTRAP);
-}
-
 void
 AlphaSystem::serialize(std::ostream &os)
 {
diff --git a/src/arch/alpha/system.hh b/src/arch/alpha/system.hh
index 0c073a6..f92b71c 100644
--- a/src/arch/alpha/system.hh
+++ b/src/arch/alpha/system.hh
@@ -56,8 +56,6 @@
 
     ~AlphaSystem();
 
-    virtual bool breakpoint();
-
 /**
  * Serialization stuff
  */
diff --git a/src/arch/alpha/utility.hh b/src/arch/alpha/utility.hh
index 1007365..9a06cc2 100644
--- a/src/arch/alpha/utility.hh
+++ b/src/arch/alpha/utility.hh
@@ -121,9 +121,9 @@
     template <class TC>
     void zeroRegisters(TC *tc);
 
-#if FULL_SYSTEM
     // Alpha IPR register accessors
-    inline bool PcPAL(Addr addr) { return addr & 0x1; }
+    inline bool PcPAL(Addr addr) { return addr & 0x3; }
+#if FULL_SYSTEM
 
     ////////////////////////////////////////////////////////////////////////
     //
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py
index 59eb18c..07ae72c 100755
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -808,8 +808,7 @@
 # a defineInst() method that generates the code for an instruction
 # definition.
 
-exportContextSymbols = ('InstObjParams', 'CodeBlock',
-                        'makeList', 're', 'string')
+exportContextSymbols = ('InstObjParams', 'makeList', 're', 'string')
 
 exportContext = {}
 
@@ -1003,27 +1002,80 @@
 # Template objects are format strings that allow substitution from
 # the attribute spaces of other objects (e.g. InstObjParams instances).
 
+labelRE = re.compile(r'[^%]%\(([^\)]+)\)[sd]')
+
 class Template:
     def __init__(self, t):
         self.template = t
 
     def subst(self, d):
-        # Start with the template namespace.  Make a copy since we're
-        # going to modify it.
-        myDict = templateMap.copy()
-        # if the argument is a dictionary, we just use it.
-        if isinstance(d, dict):
-            myDict.update(d)
-        # if the argument is an object, we use its attribute map.
-        elif hasattr(d, '__dict__'):
-            myDict.update(d.__dict__)
-        else:
-            raise TypeError, "Template.subst() arg must be or have dictionary"
+        myDict = None
+
         # Protect non-Python-dict substitutions (e.g. if there's a printf
         # in the templated C++ code)
         template = protect_non_subst_percents(self.template)
         # CPU-model-specific substitutions are handled later (in GenCode).
         template = protect_cpu_symbols(template)
+
+        # Build a dict ('myDict') to use for the template substitution.
+        # Start with the template namespace.  Make a copy since we're
+        # going to modify it.
+        myDict = templateMap.copy()
+
+        if isinstance(d, InstObjParams):
+            # If we're dealing with an InstObjParams object, we need
+            # to be a little more sophisticated.  The instruction-wide
+            # parameters are already formed, but the parameters which
+            # are only function wide still need to be generated.
+            compositeCode = ''
+
+            myDict.update(d.__dict__)
+            # The "operands" and "snippets" attributes of the InstObjParams
+            # objects are for internal use and not substitution.
+            del myDict['operands']
+            del myDict['snippets']
+
+            snippetLabels = [l for l in labelRE.findall(template)
+                             if d.snippets.has_key(l)]
+
+            snippets = dict([(s, mungeSnippet(d.snippets[s]))
+                             for s in snippetLabels])
+
+            myDict.update(snippets)
+
+            compositeCode = ' '.join(map(str, snippets.values()))
+
+            # Add in template itself in case it references any
+            # operands explicitly (like Mem)
+            compositeCode += ' ' + template
+
+            operands = SubOperandList(compositeCode, d.operands)
+
+            myDict['op_decl'] = operands.concatAttrStrings('op_decl')
+
+            is_src = lambda op: op.is_src
+            is_dest = lambda op: op.is_dest
+
+            myDict['op_src_decl'] = \
+                      operands.concatSomeAttrStrings(is_src, 'op_src_decl')
+            myDict['op_dest_decl'] = \
+                      operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
+
+            myDict['op_rd'] = operands.concatAttrStrings('op_rd')
+            myDict['op_wb'] = operands.concatAttrStrings('op_wb')
+
+            if d.operands.memOperand:
+                myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
+                myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
+
+        elif isinstance(d, dict):
+            # if the argument is a dictionary, we just use it.
+            myDict.update(d)
+        elif hasattr(d, '__dict__'):
+            # if the argument is an object, we use its attribute map.
+            myDict.update(d.__dict__)
+        else:
+            raise TypeError, "Template.subst() arg must be or have dictionary"
         return template % myDict
 
     # Convert to string.  This handles the case when a template with a
@@ -1296,10 +1348,10 @@
     def makeConstructor(self):
         c = ''
         if self.is_src:
-            c += '\n\t_srcRegIdx[%d] = %s;' % \
+            c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
                  (self.src_reg_idx, self.reg_spec)
         if self.is_dest:
-            c += '\n\t_destRegIdx[%d] = %s;' % \
+            c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
                  (self.dest_reg_idx, self.reg_spec)
         return c
 
@@ -1307,7 +1359,7 @@
         bit_select = 0
         if (self.ctype == 'float' or self.ctype == 'double'):
             error(0, 'Attempt to read control register as FP')
-        base = 'xc->readMiscRegWithEffect(%s)' % self.reg_spec
+        base = 'xc->readMiscRegOperandWithEffect(this, %s)' % self.src_reg_idx
         if self.size == self.dflt_size:
             return '%s = %s;\n' % (self.base_name, base)
         else:
@@ -1317,7 +1369,8 @@
     def makeWrite(self):
         if (self.ctype == 'float' or self.ctype == 'double'):
             error(0, 'Attempt to write control register as FP')
-        wb = 'xc->setMiscRegWithEffect(%s, %s);\n' % (self.reg_spec, self.base_name)
+        wb = 'xc->setMiscRegOperandWithEffect(this, %s, %s);\n' % \
+             (self.dest_reg_idx, self.base_name)
         wb += 'if (traceData) { traceData->setData(%s); }' % \
               self.base_name
         return wb
@@ -1550,6 +1603,48 @@
     def sort(self):
         self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
 
+class SubOperandList(OperandList):
+
+    # Find all the operands in the given code block.  Returns an operand
+    # descriptor list (instance of class OperandList).
+    def __init__(self, code, master_list):
+        self.items = []
+        self.bases = {}
+        # delete comments so we don't match on reg specifiers inside
+        code = commentRE.sub('', code)
+        # search for operands
+        next_pos = 0
+        while 1:
+            match = operandsRE.search(code, next_pos)
+            if not match:
+                # no more matches: we're done
+                break
+            op = match.groups()
+            # regexp groups are operand full name, base, and extension
+            (op_full, op_base, op_ext) = op
+            # find this op in the master list
+            op_desc = master_list.find_base(op_base)
+            if not op_desc:
+                error(0, 'Found operand %s which is not in the master list!' \
+                        ' This is an internal error' % \
+                          op_base)
+            else:
+                # See if we've already found this operand
+                op_desc = self.find_base(op_base)
+                if not op_desc:
+                    # if not, add a reference to it to this sub list
+                    self.append(master_list.bases[op_base])
+
+            # start next search after end of current match
+            next_pos = match.end()
+        self.sort()
+        self.memOperand = None
+        for op_desc in self.items:
+            if op_desc.isMem():
+                if self.memOperand:
+                    error(0, "Code block has more than one memory operand.")
+                self.memOperand = op_desc
+
 # Regular expression object to match C++ comments
 # (used in findOperands())
 commentRE = re.compile(r'//.*\n')
@@ -1564,8 +1659,12 @@
 def substMungedOpNames(code):
     return operandsWithExtRE.sub(r'\1', code)
 
-def joinLists(t):
-    return map(string.join, t)
+# Fix up code snippets for final substitution in templates.
+def mungeSnippet(s):
+    if isinstance(s, str):
+        return substMungedOpNames(substBitOps(s))
+    else:
+        return s
 
 def makeFlagConstructor(flag_list):
     if len(flag_list) == 0:
@@ -1583,11 +1682,24 @@
     code = pre + string.join(flag_list, post + pre) + post
     return code
 
-class CodeBlock:
-    def __init__(self, code):
-        self.orig_code = code
-        self.operands = OperandList(code)
-        self.code = substMungedOpNames(substBitOps(code))
+# Assume all instruction flags are of the form 'IsFoo'
+instFlagRE = re.compile(r'Is.*')
+
+# OpClass constants end in 'Op' except No_OpClass
+opClassRE = re.compile(r'.*Op|No_OpClass')
+
+class InstObjParams:
+    def __init__(self, mnem, class_name, base_class = '',
+                 snippets = {}, opt_args = []):
+        self.mnemonic = mnem
+        self.class_name = class_name
+        self.base_class = base_class
+        if not isinstance(snippets, dict):
+            snippets = {'code' : snippets}
+        compositeCode = ' '.join(map(str, snippets.values()))
+        self.snippets = snippets
+
+        self.operands = OperandList(compositeCode)
         self.constructor = self.operands.concatAttrStrings('constructor')
         self.constructor += \
                  '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
@@ -1597,28 +1709,10 @@
                  '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
         self.constructor += \
                  '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
-
-        self.op_decl = self.operands.concatAttrStrings('op_decl')
-
-        is_src = lambda op: op.is_src
-        is_dest = lambda op: op.is_dest
-
-        self.op_src_decl = \
-                  self.operands.concatSomeAttrStrings(is_src, 'op_src_decl')
-        self.op_dest_decl = \
-                  self.operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
-
-        self.op_rd = self.operands.concatAttrStrings('op_rd')
-        self.op_wb = self.operands.concatAttrStrings('op_wb')
-
         self.flags = self.operands.concatAttrLists('flags')
 
-        if self.operands.memOperand:
-            self.mem_acc_size = self.operands.memOperand.mem_acc_size
-            self.mem_acc_type = self.operands.memOperand.mem_acc_type
-
         # Make a basic guess on the operand class (function unit type).
-        # These are good enough for most cases, and will be overridden
+        # These are good enough for most cases, and can be overridden
         # later otherwise.
         if 'IsStore' in self.flags:
             self.op_class = 'MemWriteOp'
@@ -1629,48 +1723,6 @@
         else:
             self.op_class = 'IntAluOp'
 
-# Assume all instruction flags are of the form 'IsFoo'
-instFlagRE = re.compile(r'Is.*')
-
-# OpClass constants end in 'Op' except No_OpClass
-opClassRE = re.compile(r'.*Op|No_OpClass')
-
-class InstObjParams:
-    def __init__(self, mnem, class_name, base_class = '',
-                 code = None, opt_args = [], extras = {}):
-        self.mnemonic = mnem
-        self.class_name = class_name
-        self.base_class = base_class
-        if code:
-            #If the user already made a CodeBlock, pick the parts from it
-            if isinstance(code, CodeBlock):
-                origCode = code.orig_code
-                codeBlock = code
-            else:
-                origCode = code
-                codeBlock = CodeBlock(code)
-            stringExtras = {}
-            otherExtras = {}
-            for (k, v) in extras.items():
-                if type(v) == str:
-                    stringExtras[k] = v
-                else:
-                    otherExtras[k] = v
-            compositeCode = "\n".join([origCode] + stringExtras.values())
-            # compositeCode = '\n'.join([origCode] +
-            #	    [pair[1] for pair in extras])
-            compositeBlock = CodeBlock(compositeCode)
-            for code_attr in compositeBlock.__dict__.keys():
-                setattr(self, code_attr, getattr(compositeBlock, code_attr))
-            for (key, snippet) in stringExtras.items():
-                setattr(self, key, CodeBlock(snippet).code)
-            for (key, item) in otherExtras.items():
-                setattr(self, key, item)
-            self.code = codeBlock.code
-            self.orig_code = origCode
-        else:
-            self.constructor = ''
-            self.flags = []
         # Optional arguments are assumed to be either StaticInst flags
         # or an OpClass value.  To avoid having to import a complete
         # list of these values to match against, we do it ad-hoc
diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa
index d65e3eb..99c9e16 100644
--- a/src/arch/mips/isa/decoder.isa
+++ b/src/arch/mips/isa/decoder.isa
@@ -154,19 +154,22 @@
 
             0x3: decode FUNCTION_LO {
                 format HiLoOp {
-                    0x0: mult({{ val = Rs.sd * Rt.sd; }});
-                    0x1: multu({{ val = Rs.ud * Rt.ud; }});
-                }
-
-                format HiLoMiscOp {
-                    0x2: div({{ if (Rt.sd != 0) {
-                                    HI = Rs.sd % Rt.sd;
-                                    LO = Rs.sd / Rt.sd;
+                    0x0: mult({{ int64_t val = Rs.sd * Rt.sd; }});
+                    0x1: multu({{ uint64_t val = Rs.ud * Rt.ud; }});
+                    0x2: div({{ //Initialized to placate g++
+                                int64_t val = 0;
+                                if (Rt.sd != 0) {
+                                    int64_t hi = Rs.sd % Rt.sd;
+                                    int64_t lo = Rs.sd / Rt.sd;
+                                    val = (hi << 32) | lo;
                                 }
                              }});
-                    0x3: divu({{ if (Rt.ud != 0) {
-                                     HI = Rs.ud % Rt.ud;
-                                     LO = Rs.ud / Rt.ud;
+                    0x3: divu({{ //Initialized to placate g++
+                                 uint64_t val = 0;
+                                 if (Rt.ud != 0) {
+                                     uint64_t hi = Rs.ud % Rt.ud;
+                                     uint64_t lo = Rs.ud / Rt.ud;
+                                     val = (hi << 32) | lo;
                                  }
                               }});
                 }
@@ -950,17 +953,17 @@
                                 }});
 
                 format HiLoOp {
-                    0x0: madd({{ val = ((int64_t) HI << 32 | LO) +
-                                       (Rs.sd * Rt.sd);
+                    0x0: madd({{ int64_t val = ((int64_t) HI << 32 | LO) +
+                                          (Rs.sd * Rt.sd);
                               }});
-                    0x1: maddu({{ val = ((uint64_t) HI << 32 | LO) +
-                                        (Rs.ud * Rt.ud);
+                    0x1: maddu({{ uint64_t val = ((uint64_t) HI << 32 | LO) +
+                                           (Rs.ud * Rt.ud);
                                }});
-                    0x4: msub({{ val = ((int64_t) HI << 32 | LO) -
-                                       (Rs.sd * Rt.sd);
+                    0x4: msub({{ int64_t val = ((int64_t) HI << 32 | LO) -
+                                          (Rs.sd * Rt.sd);
                               }});
-                    0x5: msubu({{ val = ((uint64_t) HI << 32 | LO) -
-                                        (Rs.ud * Rt.ud);
+                    0x5: msubu({{ uint64_t val = ((uint64_t) HI << 32 | LO) -
+                                           (Rs.ud * Rt.ud);
                                }});
                 }
             }
diff --git a/src/arch/mips/isa/formats/basic.isa b/src/arch/mips/isa/formats/basic.isa
index 29a445b..ec065b8 100644
--- a/src/arch/mips/isa/formats/basic.isa
+++ b/src/arch/mips/isa/formats/basic.isa
@@ -88,7 +88,7 @@
 
 // The most basic instruction format...
 def format BasicOp(code, *flags) {{
-        iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code), flags)
+        iop = InstObjParams(name, Name, 'MipsStaticInst', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/mips/isa/formats/branch.isa b/src/arch/mips/isa/formats/branch.isa
index 8c89fbf..a67f04d 100644
--- a/src/arch/mips/isa/formats/branch.isa
+++ b/src/arch/mips/isa/formats/branch.isa
@@ -247,7 +247,7 @@
     code += '} else {\n'
     code += not_taken_code
 
-    iop = InstObjParams(name, Name, 'Branch', CodeBlock(code), inst_flags)
+    iop = InstObjParams(name, Name, 'Branch', code, inst_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
@@ -266,7 +266,7 @@
         else:
             inst_flags += (x, )
 
-    iop = InstObjParams(name, Name, 'Jump', CodeBlock(code), inst_flags)
+    iop = InstObjParams(name, Name, 'Jump', code, inst_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/mips/isa/formats/control.isa b/src/arch/mips/isa/formats/control.isa
index 6c7d396..1c63a6e 100644
--- a/src/arch/mips/isa/formats/control.isa
+++ b/src/arch/mips/isa/formats/control.isa
@@ -130,7 +130,7 @@
 }};
 
 def format System(code, *flags) {{
-        iop = InstObjParams(name, Name, 'Control', CodeBlock(code), flags)
+        iop = InstObjParams(name, Name, 'Control', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
@@ -138,7 +138,7 @@
 }};
 
 def format CP0Control(code, *flags) {{
-        iop = InstObjParams(name, Name, 'CP0Control', CodeBlock(code), flags)
+        iop = InstObjParams(name, Name, 'CP0Control', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
@@ -146,7 +146,7 @@
 }};
 
 def format CP1Control(code, *flags) {{
-        iop = InstObjParams(name, Name, 'CP1Control', CodeBlock(code), flags)
+        iop = InstObjParams(name, Name, 'CP1Control', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/mips/isa/formats/fp.isa b/src/arch/mips/isa/formats/fp.isa
index 153f3f9..ac6805c 100644
--- a/src/arch/mips/isa/formats/fp.isa
+++ b/src/arch/mips/isa/formats/fp.isa
@@ -209,7 +209,7 @@
 
 // Primary format for float point operate instructions:
 def format FloatOp(code, *flags) {{
-        iop = InstObjParams(name, Name, 'FPOp', CodeBlock(code), flags)
+        iop = InstObjParams(name, Name, 'FPOp', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
@@ -248,7 +248,7 @@
     code +=  cond_code + '}'
     code += 'FCSR = genCCVector(FCSR, CC, cond);\n'
 
-    iop = InstObjParams(name, Name, 'FPCompareOp', CodeBlock(code))
+    iop = InstObjParams(name, Name, 'FPCompareOp', code)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
@@ -299,7 +299,7 @@
     else:
         code += 'val); '
 
-    iop = InstObjParams(name, Name, 'FPOp', CodeBlock(code))
+    iop = InstObjParams(name, Name, 'FPOp', code)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
@@ -307,7 +307,7 @@
 }};
 
 def format FloatAccOp(code, *flags) {{
-        iop = InstObjParams(name, Name, 'FPOp', CodeBlock(code), flags)
+        iop = InstObjParams(name, Name, 'FPOp', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
@@ -316,7 +316,7 @@
 
 // Primary format for float64 operate instructions:
 def format Float64Op(code, *flags) {{
-        iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code), flags)
+        iop = InstObjParams(name, Name, 'MipsStaticInst', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
@@ -364,7 +364,7 @@
     code +=  cond_code2
     code += 'FCSR = genCCVector(FCSR, CC, cond2);}\n}'
 
-    iop = InstObjParams(name, Name, 'FPCompareOp', CodeBlock(code))
+    iop = InstObjParams(name, Name, 'FPCompareOp', code)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/mips/isa/formats/int.isa b/src/arch/mips/isa/formats/int.isa
index 654dd89..2f131f6 100644
--- a/src/arch/mips/isa/formats/int.isa
+++ b/src/arch/mips/isa/formats/int.isa
@@ -224,7 +224,7 @@
 }};
 
 def format IntOp(code, *opt_flags) {{
-    iop = InstObjParams(name, Name, 'IntOp', CodeBlock(code), opt_flags)
+    iop = InstObjParams(name, Name, 'IntOp', code, opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = RegNopCheckDecode.subst(iop)
@@ -232,7 +232,7 @@
 }};
 
 def format IntImmOp(code, *opt_flags) {{
-    iop = InstObjParams(name, Name, 'IntImmOp', CodeBlock(code), opt_flags)
+    iop = InstObjParams(name, Name, 'IntImmOp', code, opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = ImmNopCheckDecode.subst(iop)
@@ -240,15 +240,10 @@
 }};
 
 def format HiLoOp(code, *opt_flags) {{
-    if '.sd' in code:
-        code = 'int64_t ' + code
-    elif '.ud' in code:
-        code = 'uint64_t ' + code
-
     code += 'HI = val<63:32>;\n'
     code += 'LO = val<31:0>;\n'
 
-    iop = InstObjParams(name, Name, 'HiLoOp', CodeBlock(code), opt_flags)
+    iop = InstObjParams(name, Name, 'HiLoOp', code, opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
@@ -256,11 +251,11 @@
 }};
 
 def format HiLoMiscOp(code, *opt_flags) {{
-    iop = InstObjParams(name, Name, 'HiLoMiscOp', CodeBlock(code), opt_flags)
+    iop = InstObjParams(name, Name, 'HiLoMiscOp', code, opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
-    exec_output = HiLoExecute.subst(iop)
+    exec_output = BasicExecute.subst(iop)
 }};
 
 
diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa
index e786cfb..fccda27 100644
--- a/src/arch/mips/isa/formats/mem.isa
+++ b/src/arch/mips/isa/formats/mem.isa
@@ -166,22 +166,28 @@
 }};
 
 
-def template LoadStoreConstructor {{
+def template EACompConstructor {{
     /** TODO: change op_class to AddrGenOp or something (requires
      * creating new member of OpClass enum in op_class.hh, updating
      * config files, etc.). */
     inline %(class_name)s::EAComp::EAComp(MachInst machInst)
         : %(base_class)s("%(mnemonic)s (EAComp)", machInst, IntAluOp)
     {
-        %(ea_constructor)s;
+        %(constructor)s;
     }
+}};
 
+
+def template MemAccConstructor {{
     inline %(class_name)s::MemAcc::MemAcc(MachInst machInst)
         : %(base_class)s("%(mnemonic)s (MemAcc)", machInst, %(op_class)s)
     {
-        %(memacc_constructor)s;
+        %(constructor)s;
     }
+}};
 
+
+def template LoadStoreConstructor {{
     inline %(class_name)s::%(class_name)s(MachInst machInst)
          : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
                           new EAComp(machInst), new MemAcc(machInst))
@@ -202,7 +208,7 @@
         %(fp_enable_check)s;
         %(op_decl)s;
         %(op_rd)s;
-        %(code)s;
+        %(ea_code)s;
 
         if (fault == NoFault) {
             %(op_wb)s;
@@ -228,7 +234,7 @@
 
         if (fault == NoFault) {
             fault = xc->read(EA, (uint%(mem_acc_size)d_t&)Mem, memAccessFlags);
-            %(code)s;
+            %(memacc_code)s;
         }
 
         if (fault == NoFault) {
@@ -327,7 +333,7 @@
         EA = xc->getEA();
 
         if (fault == NoFault) {
-            %(code)s;
+            %(memacc_code)s;
         }
 
         if (fault == NoFault) {
@@ -471,7 +477,7 @@
         EA = xc->getEA();
 
         if (fault == NoFault) {
-            %(code)s;
+            %(memacc_code)s;
         }
 
         return NoFault;
diff --git a/src/arch/mips/isa/formats/mt.isa b/src/arch/mips/isa/formats/mt.isa
index 96435f8..d724033 100644
--- a/src/arch/mips/isa/formats/mt.isa
+++ b/src/arch/mips/isa/formats/mt.isa
@@ -74,7 +74,7 @@
 // Primary format for integer operate instructions:
 def format MipsMT() {{
         code = 'panic(\"Mips MT Is Currently Unimplemented.\");\n'
-        iop = InstObjParams(name, Name, 'MT', CodeBlock(code))
+        iop = InstObjParams(name, Name, 'MT', code)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/mips/isa/formats/noop.isa b/src/arch/mips/isa/formats/noop.isa
index 7f3d313..a8995d6 100644
--- a/src/arch/mips/isa/formats/noop.isa
+++ b/src/arch/mips/isa/formats/noop.isa
@@ -125,7 +125,7 @@
 
 // Like BasicOperate format, but generates NOP if RC/FC == 31
 def format BasicOperateWithNopCheck(code, *opt_args) {{
-    iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code),
+    iop = InstObjParams(name, Name, 'MipsStaticInst', code,
                         opt_args)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
diff --git a/src/arch/mips/isa/formats/tlbop.isa b/src/arch/mips/isa/formats/tlbop.isa
index b974ccb..dbfbb8c 100644
--- a/src/arch/mips/isa/formats/tlbop.isa
+++ b/src/arch/mips/isa/formats/tlbop.isa
@@ -70,9 +70,7 @@
 
 // Primary format for integer operate instructions:
 def format TlbOp(code, *opt_flags) {{
-        orig_code = code
-        cblk = CodeBlock(code)
-        iop = InstObjParams(name, Name, 'MipsStaticInst', cblk, opt_flags)
+        iop = InstObjParams(name, Name, 'MipsStaticInst', code, opt_flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecodeWithMnemonic.subst(iop)
diff --git a/src/arch/mips/isa/formats/trap.isa b/src/arch/mips/isa/formats/trap.isa
index b9066f3..96d1167 100644
--- a/src/arch/mips/isa/formats/trap.isa
+++ b/src/arch/mips/isa/formats/trap.isa
@@ -73,7 +73,7 @@
         code = 'warn(\"'
         code += 'Trap Exception Handler Is Currently Not Implemented.'
         code += '\");'
-        iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code), flags)
+        iop = InstObjParams(name, Name, 'MipsStaticInst', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/mips/isa/formats/util.isa b/src/arch/mips/isa/formats/util.isa
index 73164bc..ec52411 100644
--- a/src/arch/mips/isa/formats/util.isa
+++ b/src/arch/mips/isa/formats/util.isa
@@ -40,63 +40,24 @@
     # add hook to get effective addresses into execution trace output.
     ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
 
-    # generate code block objects
-    ea_cblk = CodeBlock(ea_code)
-    memacc_cblk = CodeBlock(memacc_code)
-    postacc_cblk = CodeBlock(postacc_code)
-
     # Some CPU models execute the memory operation as an atomic unit,
     # while others want to separate them into an effective address
     # computation and a memory access operation.  As a result, we need
     # to generate three StaticInst objects.  Note that the latter two
     # are nested inside the larger "atomic" one.
 
-    # generate InstObjParams for EAComp object
-    ea_iop = InstObjParams(name, Name, base_class, ea_cblk, inst_flags)
-
-    # generate InstObjParams for MemAcc object
-    memacc_iop = InstObjParams(name, Name, base_class, memacc_cblk, inst_flags)
-    # in the split execution model, the MemAcc portion is responsible
-    # for the post-access code.
-    memacc_iop.postacc_code = postacc_cblk.code
-
-    # generate InstObjParams for InitiateAcc, CompleteAcc object
-    # The code used depends on the template being used
-    if (exec_template_base == 'Load'):
-        initiateacc_cblk = CodeBlock(ea_code + memacc_code)
-        completeacc_cblk = CodeBlock(memacc_code + postacc_code)
-    elif (exec_template_base.startswith('Store')):
-        initiateacc_cblk = CodeBlock(ea_code + memacc_code)
-        completeacc_cblk = CodeBlock(postacc_code)
-    else:
-        initiateacc_cblk = ''
-        completeacc_cblk = ''
-
-    initiateacc_iop = InstObjParams(name, Name, base_class, initiateacc_cblk,
-                                    inst_flags)
-
-    completeacc_iop = InstObjParams(name, Name, base_class, completeacc_cblk,
-                                    inst_flags)
-
-    if (exec_template_base == 'Load'):
-        initiateacc_iop.ea_code = ea_cblk.code
-        initiateacc_iop.memacc_code = memacc_cblk.code
-        completeacc_iop.memacc_code = memacc_cblk.code
-        completeacc_iop.postacc_code = postacc_cblk.code
-    elif (exec_template_base.startswith('Store')):
-        initiateacc_iop.ea_code = ea_cblk.code
-        initiateacc_iop.memacc_code = memacc_cblk.code
-        completeacc_iop.postacc_code = postacc_cblk.code
-
-    # generate InstObjParams for unified execution
-    cblk = CodeBlock(ea_code + memacc_code + postacc_code)
-    iop = InstObjParams(name, Name, base_class, cblk, inst_flags)
-
-    iop.ea_constructor = ea_cblk.constructor
-    iop.ea_code = ea_cblk.code
-    iop.memacc_constructor = memacc_cblk.constructor
-    iop.memacc_code = memacc_cblk.code
-    iop.postacc_code = postacc_cblk.code
+    # Generate InstObjParams for each of the three objects.  Note that
+    # they differ only in the set of code objects contained (which in
+    # turn affects the object's overall operand list).
+    iop = InstObjParams(name, Name, base_class,
+                        { 'ea_code':ea_code, 'memacc_code':memacc_code, 'postacc_code':postacc_code },
+                        inst_flags)
+    ea_iop = InstObjParams(name, Name, base_class,
+                        { 'ea_code':ea_code },
+                        inst_flags)
+    memacc_iop = InstObjParams(name, Name, base_class,
+                        { 'memacc_code':memacc_code, 'postacc_code':postacc_code },
+                        inst_flags)
 
     if mem_flags:
         s = '\n\tmemAccessFlags = ' + string.join(mem_flags, '|') + ';'
@@ -117,14 +78,19 @@
     completeAccTemplate = eval(exec_template_base + 'CompleteAcc')
 
     # (header_output, decoder_output, decode_block, exec_output)
-    return (LoadStoreDeclare.subst(iop), LoadStoreConstructor.subst(iop),
+    return (LoadStoreDeclare.subst(iop),
+            EACompConstructor.subst(ea_iop)
+            + MemAccConstructor.subst(memacc_iop)
+            + LoadStoreConstructor.subst(iop),
             decode_template.subst(iop),
             EACompExecute.subst(ea_iop)
             + memAccExecTemplate.subst(memacc_iop)
             + fullExecTemplate.subst(iop)
-            + initiateAccTemplate.subst(initiateacc_iop)
-            + completeAccTemplate.subst(completeacc_iop))
+            + initiateAccTemplate.subst(iop)
+            + completeAccTemplate.subst(iop))
 }};
+
+
 output header {{
         std::string inst2string(MachInst machInst);
 }};
diff --git a/src/arch/mips/regfile/regfile.hh b/src/arch/mips/regfile/regfile.hh
index dee883c..7b57b31 100644
--- a/src/arch/mips/regfile/regfile.hh
+++ b/src/arch/mips/regfile/regfile.hh
@@ -173,6 +173,11 @@
         }
     };
 
+    static inline int flattenIntIndex(ThreadContext * tc, int reg)
+    {
+        return reg;
+    }
+
     void copyRegs(ThreadContext *src, ThreadContext *dest);
 
     void copyMiscRegs(ThreadContext *src, ThreadContext *dest);
diff --git a/src/arch/mips/remote_gdb.hh b/src/arch/mips/remote_gdb.hh
new file mode 100644
index 0000000..5886ab8
--- /dev/null
+++ b/src/arch/mips/remote_gdb.hh
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Nathan Binkert
+ */
+
+#ifndef __ARCH_MIPS_REMOTE_GDB_HH__
+#define __ARCH_MIPS_REMOTE_GDB_HH__
+
+#include "base/remote_gdb.hh"
+
+namespace MipsISA
+{
+    class RemoteGDB : public BaseRemoteGDB
+    {
+      public:
+        //These needs to be written to suit MIPS
+
+        RemoteGDB(System *system, ThreadContext *context)
+            : BaseRemoteGDB(system, context, 1)
+        {}
+
+        bool acc(Addr, size_t)
+        { panic("acc not implemented for MIPS!"); }
+
+        void getregs()
+        { panic("getregs not implemented for MIPS!"); }
+
+        void setregs()
+        { panic("setregs not implemented for MIPS!"); }
+
+        void clearSingleStep()
+        { panic("clearSingleStep not implemented for MIPS!"); }
+
+        void setSingleStep()
+        { panic("setSingleStep not implemented for MIPS!"); }
+    };
+}
+
+#endif /* __ARCH_ALPHA_REMOTE_GDB_H__ */
diff --git a/src/arch/mips/syscallreturn.hh b/src/arch/mips/syscallreturn.hh
index ef1093c..47290b6 100644
--- a/src/arch/mips/syscallreturn.hh
+++ b/src/arch/mips/syscallreturn.hh
@@ -32,51 +32,22 @@
 #ifndef __ARCH_MIPS_SYSCALLRETURN_HH__
 #define __ARCH_MIPS_SYSCALLRETURN_HH__
 
-class SyscallReturn {
-        public:
-           template <class T>
-           SyscallReturn(T v, bool s)
-           {
-               retval = (uint32_t)v;
-               success = s;
-           }
-
-           template <class T>
-           SyscallReturn(T v)
-           {
-               success = (v >= 0);
-               retval = (uint32_t)v;
-           }
-
-           ~SyscallReturn() {}
-
-           SyscallReturn& operator=(const SyscallReturn& s) {
-               retval = s.retval;
-               success = s.success;
-               return *this;
-           }
-
-           bool successful() { return success; }
-           uint64_t value() { return retval; }
-
-
-       private:
-           uint64_t retval;
-           bool success;
-};
+#include "sim/syscallreturn.hh"
+#include "cpu/thread_context.hh"
 
 namespace MipsISA
 {
-    static inline void setSyscallReturn(SyscallReturn return_value, RegFile *regs)
+    static inline void setSyscallReturn(SyscallReturn return_value,
+            ThreadContext *tc)
     {
         if (return_value.successful()) {
             // no error
-            regs->setIntReg(SyscallSuccessReg, 0);
-            regs->setIntReg(ReturnValueReg1, return_value.value());
+            tc->setIntReg(SyscallSuccessReg, 0);
+            tc->setIntReg(ReturnValueReg1, return_value.value());
         } else {
             // got an error, return details
-            regs->setIntReg(SyscallSuccessReg, (IntReg) -1);
-            regs->setIntReg(ReturnValueReg1, -return_value.value());
+            tc->setIntReg(SyscallSuccessReg, (IntReg) -1);
+            tc->setIntReg(ReturnValueReg1, -return_value.value());
         }
     }
 }
diff --git a/src/arch/sparc/SConscript b/src/arch/sparc/SConscript
index c2ef97b..555bfba 100644
--- a/src/arch/sparc/SConscript
+++ b/src/arch/sparc/SConscript
@@ -50,12 +50,12 @@
 	intregfile.cc
 	miscregfile.cc
 	regfile.cc
+	remote_gdb.cc
 	''')
 
 # Full-system sources
 full_system_sources = Split('''
 	arguments.cc
-	remote_gdb.cc
         pagetable.cc
 	stacktrace.cc
 	system.cc
diff --git a/src/arch/sparc/faults.cc b/src/arch/sparc/faults.cc
index 825ff40..a6f4343 100644
--- a/src/arch/sparc/faults.cc
+++ b/src/arch/sparc/faults.cc
@@ -302,10 +302,12 @@
     MiscReg TSTATE = tc->readMiscReg(MISCREG_TSTATE);
     MiscReg PSTATE = tc->readMiscReg(MISCREG_PSTATE);
     MiscReg HPSTATE = tc->readMiscReg(MISCREG_HPSTATE);
-    MiscReg CCR = tc->readMiscReg(MISCREG_CCR);
+    //MiscReg CCR = tc->readMiscReg(MISCREG_CCR);
+    MiscReg CCR = tc->readIntReg(NumIntArchRegs + 2);
     MiscReg ASI = tc->readMiscReg(MISCREG_ASI);
     MiscReg CWP = tc->readMiscReg(MISCREG_CWP);
-    MiscReg CANSAVE = tc->readMiscReg(MISCREG_CANSAVE);
+    //MiscReg CANSAVE = tc->readMiscReg(MISCREG_CANSAVE);
+    MiscReg CANSAVE = tc->readMiscReg(NumIntArchRegs + 3);
     MiscReg GL = tc->readMiscReg(MISCREG_GL);
     MiscReg PC = tc->readPC();
     MiscReg NPC = tc->readNextPC();
@@ -387,10 +389,12 @@
     MiscReg TSTATE = tc->readMiscReg(MISCREG_TSTATE);
     MiscReg PSTATE = tc->readMiscReg(MISCREG_PSTATE);
     MiscReg HPSTATE = tc->readMiscReg(MISCREG_HPSTATE);
-    MiscReg CCR = tc->readMiscReg(MISCREG_CCR);
+    //MiscReg CCR = tc->readMiscReg(MISCREG_CCR);
+    MiscReg CCR = tc->readIntReg(NumIntArchRegs + 2);
     MiscReg ASI = tc->readMiscReg(MISCREG_ASI);
     MiscReg CWP = tc->readMiscReg(MISCREG_CWP);
-    MiscReg CANSAVE = tc->readMiscReg(MISCREG_CANSAVE);
+    //MiscReg CANSAVE = tc->readMiscReg(MISCREG_CANSAVE);
+    MiscReg CANSAVE = tc->readIntReg(NumIntArchRegs + 3);
     MiscReg GL = tc->readMiscReg(MISCREG_GL);
     MiscReg PC = tc->readPC();
     MiscReg NPC = tc->readNextPC();
@@ -656,19 +660,21 @@
 {
     Process *p = tc->getProcessPtr();
 
-    // address is higher than the stack region or in the current stack region
-    if (vaddr > p->stack_base || vaddr > p->stack_min)
-        FaultBase::invoke(tc);
-
-    // We've accessed the next page
-    if (vaddr > p->stack_min - PageBytes) {
+    // We've accessed the next page of the stack, so extend the stack
+    // to cover it.
+    if(vaddr < p->stack_min && vaddr >= p->stack_min - PageBytes)
+    {
         p->stack_min -= PageBytes;
-        if (p->stack_base - p->stack_min > 8*1024*1024)
+        if(p->stack_base - p->stack_min > 8*1024*1024)
             fatal("Over max stack size for one thread\n");
         p->pTable->allocate(p->stack_min, PageBytes);
         warn("Increasing stack size by one page.");
-    } else {
-        FaultBase::invoke(tc);
+    }
+    // Otherwise, we have an unexpected page fault. Report that fact,
+    // and what address was accessed to cause the fault.
+    else
+    {
+        panic("Page table fault when accessing virtual address %#x\n", vaddr);
     }
 }
 
diff --git a/src/arch/sparc/intregfile.cc b/src/arch/sparc/intregfile.cc
index 60856d3..2c9d9b1 100644
--- a/src/arch/sparc/intregfile.cc
+++ b/src/arch/sparc/intregfile.cc
@@ -66,6 +66,7 @@
         memset(regGlobals[x], 0, sizeof(IntReg) * RegsPerFrame);
     for(int x = 0; x < 2 * NWindows; x++)
         memset(regSegments[x], 0, sizeof(IntReg) * RegsPerFrame);
+    memset(regs, 0, sizeof(IntReg) * NumIntRegs);
 }
 
 IntRegFile::IntRegFile()
@@ -78,6 +79,8 @@
 
 IntReg IntRegFile::readReg(int intReg)
 {
+    DPRINTF(Sparc, "Read register %d = 0x%x\n", intReg, regs[intReg]);
+    return regs[intReg];
     IntReg val;
     if(intReg < NumIntArchRegs)
         val = regView[intReg >> FrameOffsetBits][intReg & FrameOffsetMask];
@@ -96,6 +99,12 @@
     if(intReg)
     {
         DPRINTF(Sparc, "Wrote register %d = 0x%x\n", intReg, val);
+        regs[intReg] = val;
+    }
+    return;
+    if(intReg)
+    {
+        DPRINTF(Sparc, "Wrote register %d = 0x%x\n", intReg, val);
         if(intReg < NumIntArchRegs)
             regView[intReg >> FrameOffsetBits][intReg & FrameOffsetMask] = val;
         else if((intReg -= NumIntArchRegs) < NumMicroIntRegs)
diff --git a/src/arch/sparc/intregfile.hh b/src/arch/sparc/intregfile.hh
index d66d0fc..665c7aa 100644
--- a/src/arch/sparc/intregfile.hh
+++ b/src/arch/sparc/intregfile.hh
@@ -34,6 +34,7 @@
 
 #include "arch/sparc/isa_traits.hh"
 #include "arch/sparc/types.hh"
+#include "base/bitfield.hh"
 
 #include <string>
 
@@ -47,22 +48,26 @@
     std::string getIntRegName(RegIndex);
 
     const int NumIntArchRegs = 32;
-    const int NumIntRegs = MaxGL * 8 + NWindows * 16 + NumMicroIntRegs;
+    const int NumIntRegs = (MaxGL + 1) * 8 + NWindows * 16 + NumMicroIntRegs;
 
     class IntRegFile
     {
       private:
         friend class RegFile;
       protected:
+        //The number of bits needed to index into each 8 register frame
         static const int FrameOffsetBits = 3;
+        //The number of bits to choose between the 4 sets of 8 registers
         static const int FrameNumBits = 2;
 
+        //The number of registers per "frame" (8)
         static const int RegsPerFrame = 1 << FrameOffsetBits;
-        static const int FrameNumMask =
+        //A mask to get the frame number
+        static const uint64_t FrameNumMask =
                 (FrameNumBits == sizeof(int)) ?
                 (unsigned int)(-1) :
                 (1 << FrameNumBits) - 1;
-        static const int FrameOffsetMask =
+        static const uint64_t FrameOffsetMask =
                 (FrameOffsetBits == sizeof(int)) ?
                 (unsigned int)(-1) :
                 (1 << FrameOffsetBits) - 1;
@@ -70,6 +75,7 @@
         IntReg regGlobals[MaxGL+1][RegsPerFrame];
         IntReg regSegments[2 * NWindows][RegsPerFrame];
         IntReg microRegs[NumMicroIntRegs];
+        IntReg regs[NumIntRegs];
 
         enum regFrame {Globals, Outputs, Locals, Inputs, NumFrames};
 
diff --git a/src/arch/sparc/isa/base.isa b/src/arch/sparc/isa/base.isa
index 5b65ec2..58a17f2 100644
--- a/src/arch/sparc/isa/base.isa
+++ b/src/arch/sparc/isa/base.isa
@@ -189,23 +189,149 @@
             const int MaxOutput = 16;
             const int MaxLocal = 24;
             const int MaxInput = 32;
-            const int MaxMicroReg = 33;
-            if (reg == FramePointerReg)
-                ccprintf(os, "%%fp");
-            else if (reg == StackPointerReg)
-                ccprintf(os, "%%sp");
-            else if(reg < MaxGlobal)
-                ccprintf(os, "%%g%d", reg);
-            else if(reg < MaxOutput)
-                ccprintf(os, "%%o%d", reg - MaxGlobal);
-            else if(reg < MaxLocal)
-                ccprintf(os, "%%l%d", reg - MaxOutput);
-            else if(reg < MaxInput)
-                ccprintf(os, "%%i%d", reg - MaxLocal);
-            else if(reg < MaxMicroReg)
-                ccprintf(os, "%%u%d", reg - MaxInput);
-            else {
-                ccprintf(os, "%%f%d", reg - MaxMicroReg);
+            const int MaxMicroReg = 40;
+            if (reg < FP_Base_DepTag) {
+                //If we used a register from the next or previous window,
+                //take out the offset.
+                while (reg >= MaxMicroReg)
+                    reg -= MaxMicroReg;
+                if (reg == FramePointerReg)
+                    ccprintf(os, "%%fp");
+                else if (reg == StackPointerReg)
+                    ccprintf(os, "%%sp");
+                else if(reg < MaxGlobal)
+                    ccprintf(os, "%%g%d", reg);
+                else if(reg < MaxOutput)
+                    ccprintf(os, "%%o%d", reg - MaxGlobal);
+                else if(reg < MaxLocal)
+                    ccprintf(os, "%%l%d", reg - MaxOutput);
+                else if(reg < MaxInput)
+                    ccprintf(os, "%%i%d", reg - MaxLocal);
+                else if(reg < MaxMicroReg)
+                    ccprintf(os, "%%u%d", reg - MaxInput);
+                //The fake int regs that are really control regs
+                else {
+                    switch (reg - MaxMicroReg) {
+                      case 1:
+                        ccprintf(os, "%%y");
+                        break;
+                      case 2:
+                        ccprintf(os, "%%ccr");
+                        break;
+                      case 3:
+                        ccprintf(os, "%%cansave");
+                        break;
+                      case 4:
+                        ccprintf(os, "%%canrestore");
+                        break;
+                      case 5:
+                        ccprintf(os, "%%cleanwin");
+                        break;
+                      case 6:
+                        ccprintf(os, "%%otherwin");
+                        break;
+                      case 7:
+                        ccprintf(os, "%%wstate");
+                        break;
+                    }
+                }
+            } else if (reg < Ctrl_Base_DepTag) {
+                ccprintf(os, "%%f%d", reg - FP_Base_DepTag);
+            } else {
+                switch (reg - Ctrl_Base_DepTag) {
+                  case MISCREG_ASI:
+                    ccprintf(os, "%%asi");
+                    break;
+                  case MISCREG_FPRS:
+                    ccprintf(os, "%%fprs");
+                    break;
+                  case MISCREG_PCR:
+                    ccprintf(os, "%%pcr");
+                    break;
+                  case MISCREG_PIC:
+                    ccprintf(os, "%%pic");
+                    break;
+                  case MISCREG_GSR:
+                    ccprintf(os, "%%gsr");
+                    break;
+                  case MISCREG_SOFTINT:
+                    ccprintf(os, "%%softint");
+                    break;
+                  case MISCREG_SOFTINT_SET:
+                    ccprintf(os, "%%softint_set");
+                    break;
+                  case MISCREG_SOFTINT_CLR:
+                    ccprintf(os, "%%softint_clr");
+                    break;
+                  case MISCREG_TICK_CMPR:
+                    ccprintf(os, "%%tick_cmpr");
+                    break;
+                  case MISCREG_STICK:
+                    ccprintf(os, "%%stick");
+                    break;
+                  case MISCREG_STICK_CMPR:
+                    ccprintf(os, "%%stick_cmpr");
+                    break;
+                  case MISCREG_TPC:
+                    ccprintf(os, "%%tpc");
+                    break;
+                  case MISCREG_TNPC:
+                    ccprintf(os, "%%tnpc");
+                    break;
+                  case MISCREG_TSTATE:
+                    ccprintf(os, "%%tstate");
+                    break;
+                  case MISCREG_TT:
+                    ccprintf(os, "%%tt");
+                    break;
+                  case MISCREG_TICK:
+                    ccprintf(os, "%%tick");
+                    break;
+                  case MISCREG_TBA:
+                    ccprintf(os, "%%tba");
+                    break;
+                  case MISCREG_PSTATE:
+                    ccprintf(os, "%%pstate");
+                    break;
+                  case MISCREG_TL:
+                    ccprintf(os, "%%tl");
+                    break;
+                  case MISCREG_PIL:
+                    ccprintf(os, "%%pil");
+                    break;
+                  case MISCREG_CWP:
+                    ccprintf(os, "%%cwp");
+                    break;
+                  case MISCREG_GL:
+                    ccprintf(os, "%%gl");
+                    break;
+                  case MISCREG_HPSTATE:
+                    ccprintf(os, "%%hpstate");
+                    break;
+                  case MISCREG_HTSTATE:
+                    ccprintf(os, "%%htstate");
+                    break;
+                  case MISCREG_HINTP:
+                    ccprintf(os, "%%hintp");
+                    break;
+                  case MISCREG_HTBA:
+                    ccprintf(os, "%%htba");
+                    break;
+                  case MISCREG_HSTICK_CMPR:
+                    ccprintf(os, "%%hstick_cmpr");
+                    break;
+                  case MISCREG_HVER:
+                    ccprintf(os, "%%hver");
+                    break;
+                  case MISCREG_STRAND_STS_REG:
+                    ccprintf(os, "%%strand_sts_reg");
+                    break;
+                  case MISCREG_FSR:
+                    ccprintf(os, "%%fsr");
+                    break;
+                  default:
+                    ccprintf(os, "%%ctrl%d", reg - Ctrl_Base_DepTag);
+                }
             }
         }
 
diff --git a/src/arch/sparc/isa/decoder.isa b/src/arch/sparc/isa/decoder.isa
index 32256a0..cc6eded 100644
--- a/src/arch/sparc/isa/decoder.isa
+++ b/src/arch/sparc/isa/decoder.isa
@@ -76,9 +76,15 @@
                     }});
                     0x2: bpccx(19, {{
                         if(passesCondition(Ccr<7:4>, COND2))
+                        {
+                            //warn("Took branch!\n");
                             NNPC = xc->readPC() + disp;
+                        }
                         else
+                        {
+                            //warn("Didn't take branch!\n");
                             handle_annul
+                        }
                     }});
                 }
             }
@@ -249,16 +255,14 @@
                 {{((Rs1 & val2) | (~resTemp & (Rs1 | val2)))<63:>}},
                 {{Rs1<63:> == val2<63:> && val2<63:> != resTemp<63:>}}
             );
-            0x1A: umulcc({{
+            0x1A: IntOpCcRes::umulcc({{
                 uint64_t resTemp;
                 Rd = resTemp = Rs1.udw<31:0> * Rs2_or_imm13.udw<31:0>;
-                Y = resTemp<63:32>;}},
-                {{0}},{{0}},{{0}},{{0}});
-            0x1B: smulcc({{
+                Y = resTemp<63:32>;}});
+            0x1B: IntOpCcRes::smulcc({{
                 int64_t resTemp;
                 Rd = resTemp = sext<32>(Rs1.sdw<31:0>) * sext<32>(Rs2_or_imm13<31:0>);
-                Y = resTemp<63:32>;}},
-                {{0}},{{0}},{{0}},{{0}});
+                Y = resTemp<63:32>;}});
             0x1C: subccc({{
                 int64_t resTemp, val2 = Rs2_or_imm13;
                 int64_t carryin = Ccr<0:0>;
@@ -268,10 +272,9 @@
                 {{((~Rs1 & val2) | (resTemp & (~Rs1 | val2)))<63:>}},
                 {{Rs1<63:> != val2<63:> && Rs1<63:> != resTemp<63:>}}
             );
-            0x1D: udivxcc({{
+            0x1D: IntOpCcRes::udivxcc({{
                 if(Rs2_or_imm13.udw == 0) fault = new DivisionByZero;
-                else Rd = Rs1.udw / Rs2_or_imm13.udw;}}
-                ,{{0}},{{0}},{{0}},{{0}});
+                else Rd = Rs1.udw / Rs2_or_imm13.udw;}});
             0x1E: udivcc({{
                 uint32_t resTemp, val2 = Rs2_or_imm13.udw;
                 int32_t overflow = 0;
@@ -888,16 +891,11 @@
                 }
             }});
             0x39: Branch::return({{
-                //If both MemAddressNotAligned and
-                //a fill trap happen, it's not clear
-                //which one should be returned.
                 Addr target = Rs1 + Rs2_or_imm13;
-                if(target & 0x3)
-                    fault = new MemAddressNotAligned;
-                else
-                    NNPC = target;
                 if(fault == NoFault)
                 {
+                    //Check for fills which are higher priority than alignment
+                    //faults.
                     if(Canrestore == 0)
                     {
                         if(Otherwin)
@@ -905,18 +903,15 @@
                         else
                             fault = new FillNNormal(4*Wstate<2:0>);
                     }
+                    //Check for alignment faults
+                    else if(target & 0x3)
+                        fault = new MemAddressNotAligned;
                     else
                     {
-                        //CWP should be set directly so that it always happens
-                        //Also, this will allow writing to the new window and
-                        //reading from the old one
+                        NNPC = target;
                         Cwp = (Cwp - 1 + NWindows) % NWindows;
                         Cansave = Cansave + 1;
                         Canrestore = Canrestore - 1;
-                        //This is here to make sure the CWP is written
-                        //no matter what. This ensures that the results
-                        //are written in the new window as well.
-                        xc->setMiscRegWithEffect(MISCREG_CWP, Cwp);
                     }
                 }
             }});
@@ -934,7 +929,7 @@
                         xc->syscall(R1);
 #endif
                     }
-                }});
+                }}, IsSerializeAfter, IsNonSpeculative);
                 0x2: Trap::tccx({{
                     if(passesCondition(Ccr<7:4>, COND2))
                     {
@@ -947,36 +942,27 @@
                         xc->syscall(R1);
 #endif
                     }
-                }});
+                }}, IsSerializeAfter, IsNonSpeculative);
             }
             0x3B: Nop::flush({{/*Instruction memory flush*/}});
             0x3C: save({{
-                //CWP should be set directly so that it always happens
-                //Also, this will allow writing to the new window and
-                //reading from the old one
                 if(Cansave == 0)
                 {
                     if(Otherwin)
                         fault = new SpillNOther(4*Wstate<5:3>);
                     else
                         fault = new SpillNNormal(4*Wstate<2:0>);
-                    //Cwp = (Cwp + 2) % NWindows;
                 }
                 else if(Cleanwin - Canrestore == 0)
                 {
-                    //Cwp = (Cwp + 1) % NWindows;
                     fault = new CleanWindow;
                 }
                 else
                 {
                     Cwp = (Cwp + 1) % NWindows;
-                    Rd = Rs1 + Rs2_or_imm13;
+                    Rd_next = Rs1 + Rs2_or_imm13;
                     Cansave = Cansave - 1;
                     Canrestore = Canrestore + 1;
-                    //This is here to make sure the CWP is written
-                    //no matter what. This ensures that the results
-                    //are written in the new window as well.
-                    xc->setMiscRegWithEffect(MISCREG_CWP, Cwp);
                 }
             }});
             0x3D: restore({{
@@ -989,17 +975,10 @@
                 }
                 else
                 {
-                    //CWP should be set directly so that it always happens
-                    //Also, this will allow writing to the new window and
-                    //reading from the old one
                     Cwp = (Cwp - 1 + NWindows) % NWindows;
-                    Rd = Rs1 + Rs2_or_imm13;
+                    Rd_prev = Rs1 + Rs2_or_imm13;
                     Cansave = Cansave + 1;
                     Canrestore = Canrestore - 1;
-                    //This is here to make sure the CWP is written
-                    //no matter what. This ensures that the results
-                    //are written in the new window as well.
-                    xc->setMiscRegWithEffect(MISCREG_CWP, Cwp);
                 }
             }});
             0x3E: decode FCN {
diff --git a/src/arch/sparc/isa/formats/basic.isa b/src/arch/sparc/isa/formats/basic.isa
index 56b201b..e8762a2 100644
--- a/src/arch/sparc/isa/formats/basic.isa
+++ b/src/arch/sparc/isa/formats/basic.isa
@@ -97,8 +97,7 @@
 
 // The most basic instruction format... used only for a few misc. insts
 def format BasicOperate(code, *flags) {{
-        iop = InstObjParams(name, Name, 'SparcStaticInst',
-                CodeBlock(code), flags)
+        iop = InstObjParams(name, Name, 'SparcStaticInst', code, flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/sparc/isa/formats/branch.isa b/src/arch/sparc/isa/formats/branch.isa
index 5fb7ade..2db7563 100644
--- a/src/arch/sparc/isa/formats/branch.isa
+++ b/src/arch/sparc/isa/formats/branch.isa
@@ -170,7 +170,7 @@
             printMnemonic(response, mnemonic);
             ccprintf(response, "0x%x", target);
 
-            if(symtab->findNearestSymbol(target, symbol, symbolAddr))
+            if(symtab && symtab->findNearestSymbol(target, symbol, symbolAddr))
             {
                 ccprintf(response, " <%s", symbol);
                 if(symbolAddr != target)
@@ -178,6 +178,10 @@
                 else
                     ccprintf(response, ">");
             }
+            else
+            {
+                ccprintf(response, "<%d>", target);
+            }
 
             return response.str();
         }
@@ -244,7 +248,6 @@
 // Primary format for branch instructions:
 def format BranchN(bits, code, *opt_flags) {{
         code = re.sub(r'handle_annul', handle_annul, code)
-        codeBlk = CodeBlock(code)
         new_opt_flags = []
         for flag in opt_flags:
             if flag == ',a':
@@ -252,7 +255,7 @@
                 Name += 'Annul'
             else:
                 new_opt_flags += flag
-        iop = InstObjParams(name, Name, "BranchNBits<%d>" % bits, codeBlk, new_opt_flags)
+        iop = InstObjParams(name, Name, "BranchNBits<%d>" % bits, code, new_opt_flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         exec_output = BranchExecute.subst(iop)
@@ -262,8 +265,7 @@
 // Primary format for branch instructions:
 def format BranchSplit(code, *opt_flags) {{
         code = re.sub(r'handle_annul', handle_annul, code)
-        codeBlk = CodeBlock(code)
-        iop = InstObjParams(name, Name, 'BranchSplit', codeBlk, opt_flags)
+        iop = InstObjParams(name, Name, 'BranchSplit', code, opt_flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         exec_output = BranchExecute.subst(iop)
diff --git a/src/arch/sparc/isa/formats/integerop.isa b/src/arch/sparc/isa/formats/integerop.isa
index 9470fc5..f877b87 100644
--- a/src/arch/sparc/isa/formats/integerop.isa
+++ b/src/arch/sparc/isa/formats/integerop.isa
@@ -263,14 +263,15 @@
     def doIntFormat(code, ccCode, name, Name, opt_flags):
         (usesImm, code, immCode,
          rString, iString) = splitOutImm(code)
-        iop = InstObjParams(name, Name,	'IntOp', code,
-                opt_flags, {"cc_code": ccCode})
+        iop = InstObjParams(name, Name,	'IntOp',
+                {"code": code, "cc_code": ccCode},
+                opt_flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         exec_output = IntOpExecute.subst(iop)
         if usesImm:
             imm_iop = InstObjParams(name, Name + 'Imm', 'IntOpImm' + iString,
-                    immCode, opt_flags, {"cc_code": ccCode})
+                    {"code": immCode, "cc_code": ccCode}, opt_flags)
             header_output += BasicDeclare.subst(imm_iop)
             decoder_output += BasicConstructor.subst(imm_iop)
             exec_output += IntOpExecute.subst(imm_iop)
@@ -341,7 +342,7 @@
 
 def format SetHi(code, *opt_flags) {{
     iop = InstObjParams(name, Name, 'SetHi',
-            code, opt_flags, {"cc_code": ''})
+            {"code": code, "cc_code": ''}, opt_flags)
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
     exec_output = IntOpExecute.subst(iop)
diff --git a/src/arch/sparc/isa/formats/mem/basicmem.isa b/src/arch/sparc/isa/formats/mem/basicmem.isa
index 55e9fba..1d9075a 100644
--- a/src/arch/sparc/isa/formats/mem/basicmem.isa
+++ b/src/arch/sparc/isa/formats/mem/basicmem.isa
@@ -55,16 +55,20 @@
     def doMemFormat(code, execute, faultCode, name, Name, asi, opt_flags):
         addrCalcReg = 'EA = Rs1 + Rs2;'
         addrCalcImm = 'EA = Rs1 + imm;'
-        iop = InstObjParams(name, Name, 'Mem', code,
-                opt_flags, {"fault_check": faultCode, "ea_code": addrCalcReg})
-        iop_imm = InstObjParams(name, Name + "Imm", 'MemImm', code,
-                opt_flags, {"fault_check": faultCode, "ea_code": addrCalcImm})
+        iop = InstObjParams(name, Name, 'Mem',
+                {"code": code, "fault_check": faultCode,
+                 "ea_code": addrCalcReg},
+                opt_flags)
+        iop_imm = InstObjParams(name, Name + "Imm", 'MemImm',
+                {"code": code, "fault_check": faultCode,
+                 "ea_code": addrCalcImm},
+                opt_flags)
         header_output = MemDeclare.subst(iop) + MemDeclare.subst(iop_imm)
         decoder_output = BasicConstructor.subst(iop) + BasicConstructor.subst(iop_imm)
         decode_block = ROrImmDecode.subst(iop)
         exec_output = doDualSplitExecute(code, addrCalcReg, addrCalcImm,
-                execute, faultCode, name, name + "Imm", Name, Name + "Imm",
-                asi, opt_flags)
+                execute, faultCode, name, name + "Imm",
+                Name, Name + "Imm", asi, opt_flags)
         return (header_output, decoder_output, exec_output, decode_block)
 }};
 
@@ -72,7 +76,7 @@
         (header_output,
          decoder_output,
          exec_output,
-         decode_block) = doMemFormat(code, LoadExecute,
+         decode_block) = doMemFormat(code, LoadFuncs,
             AlternateASIPrivFaultCheck, name, Name, asi, opt_flags)
 }};
 
@@ -80,7 +84,7 @@
         (header_output,
          decoder_output,
          exec_output,
-         decode_block) = doMemFormat(code, StoreExecute,
+         decode_block) = doMemFormat(code, StoreFuncs,
             AlternateASIPrivFaultCheck, name, Name, asi, opt_flags)
 }};
 
@@ -89,7 +93,7 @@
          decoder_output,
          exec_output,
          decode_block) = doMemFormat(code,
-             LoadExecute, '', name, Name, 0, opt_flags)
+             LoadFuncs, '', name, Name, 0, opt_flags)
 }};
 
 def format Store(code, *opt_flags) {{
@@ -97,5 +101,5 @@
          decoder_output,
          exec_output,
          decode_block) = doMemFormat(code,
-             StoreExecute, '', name, Name, 0, opt_flags)
+             StoreFuncs, '', name, Name, 0, opt_flags)
 }};
diff --git a/src/arch/sparc/isa/formats/mem/blockmem.isa b/src/arch/sparc/isa/formats/mem/blockmem.isa
index c36fede..9795d23 100644
--- a/src/arch/sparc/isa/formats/mem/blockmem.isa
+++ b/src/arch/sparc/isa/formats/mem/blockmem.isa
@@ -456,14 +456,14 @@
             else:
                 flag_code = "flags[IsDelayedCommit] = true;"
             pcedCode = matcher.sub("Frd_%d" % microPc, code)
-            iop = InstObjParams(name, Name, 'BlockMem', pcedCode,
-                    opt_flags, {"ea_code": addrCalcReg,
+            iop = InstObjParams(name, Name, 'BlockMem',
+                    {"code": pcedCode, "ea_code": addrCalcReg,
                     "fault_check": faultCode, "micro_pc": microPc,
-                    "set_flags": flag_code})
-            iop_imm = InstObjParams(name, Name + 'Imm', 'BlockMemImm', pcedCode,
-                    opt_flags, {"ea_code": addrCalcImm,
+                    "set_flags": flag_code}, opt_flags)
+            iop_imm = InstObjParams(name, Name + 'Imm', 'BlockMemImm',
+                    {"code": pcedCode, "ea_code": addrCalcImm,
                     "fault_check": faultCode, "micro_pc": microPc,
-                    "set_flags": flag_code})
+                    "set_flags": flag_code}, opt_flags)
             decoder_output += BlockMemMicroConstructor.subst(iop)
             decoder_output += BlockMemMicroConstructor.subst(iop_imm)
             exec_output += doDualSplitExecute(
@@ -496,18 +496,18 @@
             else:
                 flag_code = "flags[IsDelayedCommit] = true; flags[IsFirstMicroOp] = true;"
                 pcedCode = matcher.sub("uReg0", code)
-            iop = InstObjParams(name, Name, 'TwinMem', pcedCode,
-                    opt_flags, {"ea_code": addrCalcReg,
+            iop = InstObjParams(name, Name, 'TwinMem',
+                    {"code": pcedCode, "ea_code": addrCalcReg,
                     "fault_check": faultCode, "micro_pc": microPc,
-                    "set_flags": flag_code})
-            iop_imm = InstObjParams(name, Name + 'Imm', 'TwinMemImm', pcedCode,
-                    opt_flags, {"ea_code": addrCalcImm,
+                    "set_flags": flag_code}, opt_flags)
+            iop_imm = InstObjParams(name, Name + 'Imm', 'TwinMemImm',
+                    {"code": pcedCode, "ea_code": addrCalcImm,
                     "fault_check": faultCode, "micro_pc": microPc,
-                    "set_flags": flag_code})
+                    "set_flags": flag_code}, opt_flags)
             decoder_output += BlockMemMicroConstructor.subst(iop)
             decoder_output += BlockMemMicroConstructor.subst(iop_imm)
             exec_output += doDualSplitExecute(
-                    pcedCode, addrCalcReg, addrCalcImm, LoadExecute, faultCode,
+                    pcedCode, addrCalcReg, addrCalcImm, LoadFuncs, faultCode,
                     makeMicroName(name, microPc),
                     makeMicroName(name + "Imm", microPc),
                     makeMicroName(Name, microPc),
@@ -527,7 +527,7 @@
          decoder_output,
          exec_output,
          decode_block) = doBlockMemFormat(code, faultCode,
-             LoadExecute, name, Name, asi, opt_flags)
+             LoadFuncs, name, Name, asi, opt_flags)
 }};
 
 def format BlockStore(code, asi, *opt_flags) {{
@@ -539,7 +539,7 @@
          decoder_output,
          exec_output,
          decode_block) = doBlockMemFormat(code, faultCode,
-             StoreExecute, name, Name, asi, opt_flags)
+             StoreFuncs, name, Name, asi, opt_flags)
 }};
 
 def format TwinLoad(code, asi, *opt_flags) {{
diff --git a/src/arch/sparc/isa/formats/mem/util.isa b/src/arch/sparc/isa/formats/mem/util.isa
index 3b02f58..dbaabdc 100644
--- a/src/arch/sparc/isa/formats/mem/util.isa
+++ b/src/arch/sparc/isa/formats/mem/util.isa
@@ -145,7 +145,7 @@
             %(op_decl)s;
             %(op_rd)s;
             %(ea_code)s;
-            DPRINTF(Sparc, "The address is 0x%x\n", EA);
+            DPRINTF(Sparc, "%s: The address is 0x%x\n", mnemonic, EA);
             %(fault_check)s;
             if(fault == NoFault)
             {
@@ -163,17 +163,19 @@
 
             return fault;
         }
+}};
 
+def template LoadInitiateAcc {{
         Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s * xc,
                 Trace::InstRecord * traceData) const
         {
             Fault fault = NoFault;
             Addr EA;
-            uint%(mem_acc_size)s_t Mem;
             %(fp_enable_check)s;
-            %(ea_decl)s;
-            %(ea_rd)s;
+            %(op_decl)s;
+            %(op_rd)s;
             %(ea_code)s;
+            DPRINTF(Sparc, "%s: The address is 0x%x\n", mnemonic, EA);
             %(fault_check)s;
             if(fault == NoFault)
             {
@@ -181,18 +183,20 @@
             }
             return fault;
         }
+}};
 
+def template LoadCompleteAcc {{
         Fault %(class_name)s::completeAcc(PacketPtr pkt, %(CPU_exec_context)s * xc,
                 Trace::InstRecord * traceData) const
         {
             Fault fault = NoFault;
-            %(code_decl)s;
-            %(code_rd)s;
+            %(op_decl)s;
+            %(op_rd)s;
             Mem = pkt->get<typeof(Mem)>();
             %(code)s;
             if(fault == NoFault)
             {
-                %(code_wb)s;
+                %(op_wb)s;
             }
             return fault;
         }
@@ -212,7 +216,7 @@
             %(op_decl)s;
             %(op_rd)s;
             %(ea_code)s;
-            DPRINTF(Sparc, "The address is 0x%x\n", EA);
+            DPRINTF(Sparc, "%s: The address is 0x%x\n", mnemonic, EA);
             %(fault_check)s;
             if(fault == NoFault)
             {
@@ -231,7 +235,9 @@
 
             return fault;
         }
+}};
 
+def template StoreInitiateAcc {{
         Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s * xc,
                 Trace::InstRecord * traceData) const
         {
@@ -242,7 +248,7 @@
             %(op_decl)s;
             %(op_rd)s;
             %(ea_code)s;
-            DPRINTF(Sparc, "The address is 0x%x\n", EA);
+            DPRINTF(Sparc, "%s: The address is 0x%x\n", mnemonic, EA);
             %(fault_check)s;
             if(fault == NoFault)
             {
@@ -260,7 +266,9 @@
             }
             return fault;
         }
+}};
 
+def template StoreCompleteAcc {{
         Fault %(class_name)s::completeAcc(PacketPtr, %(CPU_exec_context)s * xc,
                 Trace::InstRecord * traceData) const
         {
@@ -280,6 +288,8 @@
 
 //Here are some code snippets which check for various fault conditions
 let {{
+    LoadFuncs = [LoadExecute, LoadInitiateAcc, LoadCompleteAcc]
+    StoreFuncs = [StoreExecute, StoreInitiateAcc, StoreCompleteAcc]
     # The LSB can be zero, since it's really the MSB in doubles and quads
     # and we're dealing with doubles
     BlockAlignmentFaultCheck = '''
@@ -320,21 +330,11 @@
 //and in the other they're distributed across two. Also note that for
 //execute functions, the name of the base class doesn't matter.
 let {{
-    def doSplitExecute(code, execute, name, Name, asi, opt_flags, microParam):
+    def doSplitExecute(execute, name, Name, asi, opt_flags, microParam):
         microParam["asi_val"] = asi;
-        codeParam = microParam.copy()
-        codeParam["ea_code"] = ''
-        codeIop = InstObjParams(name, Name, '', code, opt_flags, codeParam)
-        eaIop = InstObjParams(name, Name, '', microParam["ea_code"],
-                opt_flags, microParam)
-        iop = InstObjParams(name, Name, '', code, opt_flags, microParam)
-        (iop.ea_decl,
-         iop.ea_rd,
-         iop.ea_wb) = (eaIop.op_decl, eaIop.op_rd, eaIop.op_wb)
-        (iop.code_decl,
-         iop.code_rd,
-         iop.code_wb) = (codeIop.op_decl, codeIop.op_rd, codeIop.op_wb)
-        return execute.subst(iop)
+        iop = InstObjParams(name, Name, '', microParam, opt_flags)
+        (execf, initf, compf) = execute
+        return execf.subst(iop) + initf.subst(iop) + compf.subst(iop)
 
 
     def doDualSplitExecute(code, eaRegCode, eaImmCode, execute,
@@ -343,8 +343,9 @@
         for (eaCode, name, Name) in (
                 (eaRegCode, nameReg, NameReg),
                 (eaImmCode, nameImm, NameImm)):
-            microParams = {"ea_code" : eaCode, "fault_check": faultCode}
-            executeCode += doSplitExecute(code, execute, name, Name,
+            microParams = {"code": code, "ea_code": eaCode,
+                "fault_check": faultCode}
+            executeCode += doSplitExecute(execute, name, Name,
                     asi, opt_flags, microParams)
         return executeCode
 }};
diff --git a/src/arch/sparc/isa/formats/nop.isa b/src/arch/sparc/isa/formats/nop.isa
index 37ef2e8..de2ba2f 100644
--- a/src/arch/sparc/isa/formats/nop.isa
+++ b/src/arch/sparc/isa/formats/nop.isa
@@ -88,9 +88,7 @@
 
 // Primary format for integer operate instructions:
 def format Nop(code, *opt_flags) {{
-        orig_code = code
-        cblk = CodeBlock(code)
-        iop = InstObjParams(name, Name, 'Nop', cblk, opt_flags)
+        iop = InstObjParams(name, Name, 'Nop', code, opt_flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/sparc/isa/formats/priv.isa b/src/arch/sparc/isa/formats/priv.isa
index 3d47ca0..36403af 100644
--- a/src/arch/sparc/isa/formats/priv.isa
+++ b/src/arch/sparc/isa/formats/priv.isa
@@ -235,8 +235,9 @@
                 name = mnem
                 regBase = 'WrPriv'
                 break
-        iop = InstObjParams(name, Name, regBase, code,
-                opt_flags, {"check": checkCode, "reg_name": regName})
+        iop = InstObjParams(name, Name, regBase,
+                {"code": code, "check": checkCode, "reg_name": regName},
+                opt_flags)
         header_output = BasicDeclare.subst(iop)
         if regName == '':
             decoder_output = BasicConstructor.subst(iop)
@@ -245,7 +246,8 @@
         exec_output = PrivExecute.subst(iop)
         if usesImm:
             imm_iop = InstObjParams(name, Name + 'Imm', regBase + 'Imm',
-                    immCode, opt_flags, {"check": checkCode, "reg_name": regName})
+                    {"code": immCode, "check": checkCode, "reg_name": regName},
+                    opt_flags)
             header_output += BasicDeclare.subst(imm_iop)
             if regName == '':
                 decoder_output += BasicConstructor.subst(imm_iop)
diff --git a/src/arch/sparc/isa/formats/trap.isa b/src/arch/sparc/isa/formats/trap.isa
index 04d467c..9c118b2 100644
--- a/src/arch/sparc/isa/formats/trap.isa
+++ b/src/arch/sparc/isa/formats/trap.isa
@@ -83,9 +83,7 @@
 }};
 
 def format Trap(code, *opt_flags) {{
-        orig_code = code
-        cblk = CodeBlock(code)
-        iop = InstObjParams(name, Name, 'Trap', cblk, opt_flags)
+        iop = InstObjParams(name, Name, 'Trap', code, opt_flags)
         header_output = BasicDeclare.subst(iop)
         decoder_output = BasicConstructor.subst(iop)
         decode_block = BasicDecode.subst(iop)
diff --git a/src/arch/sparc/isa/includes.isa b/src/arch/sparc/isa/includes.isa
index 5d90fbd..474af3a 100644
--- a/src/arch/sparc/isa/includes.isa
+++ b/src/arch/sparc/isa/includes.isa
@@ -38,12 +38,13 @@
 #include <sstream>
 #include <iostream>
 
-#include "cpu/static_inst.hh"
 #include "arch/sparc/faults.hh"
-#include "mem/request.hh"  // some constructors use MemReq flags
-#include "mem/packet.hh"
 #include "arch/sparc/isa_traits.hh"
 #include "arch/sparc/regfile.hh"
+#include "base/misc.hh"
+#include "cpu/static_inst.hh"
+#include "mem/packet.hh"
+#include "mem/request.hh"  // some constructors use MemReq flags
 }};
 
 output decoder {{
diff --git a/src/arch/sparc/isa/operands.isa b/src/arch/sparc/isa/operands.isa
index abb82f8..2d1c3d3 100644
--- a/src/arch/sparc/isa/operands.isa
+++ b/src/arch/sparc/isa/operands.isa
@@ -56,15 +56,26 @@
     # Int regs default to unsigned, but code should not count on this.
     # For clarity, descriptions that depend on unsigned behavior should
     # explicitly specify '.uq'.
+
     'Rd': 		('IntReg', 'udw', 'RD', 'IsInteger', 1),
+    # The Rd from the previous window
+    'Rd_prev':		('IntReg', 'udw', 'RD + NumIntArchRegs + NumMicroIntRegs', 'IsInteger', 2),
+    # The Rd from the next window
+    'Rd_next':		('IntReg', 'udw', 'RD + 2 * NumIntArchRegs + NumMicroIntRegs', 'IsInteger', 3),
     # For microcoded twin load instructions, RdTwin appears in the "code"
-    # for the instruction and is replaced by RdLow or RdHigh by the format
+    # for the instruction is replaced by RdLow or RdHigh by the format
     # before it's processed by the iop.
-    'RdLow': 		('IntReg', 'udw', 'RD & (~1)', 'IsInteger', 2),
-    'RdHigh':		('IntReg', 'udw', 'RD | 1', 'IsInteger', 3),
-    'Rs1': 		('IntReg', 'udw', 'RS1', 'IsInteger', 4),
-    'Rs2': 		('IntReg', 'udw', 'RS2', 'IsInteger', 5),
-    'uReg0':		('IntReg', 'udw', 'NumIntArchRegs', 'IsInteger', 6),
+    # The low (even) register of a two register pair
+    'RdLow': 		('IntReg', 'udw', 'RD & (~1)', 'IsInteger', 4),
+    # The high (odd) register of a two register pair
+    'RdHigh':		('IntReg', 'udw', 'RD | 1', 'IsInteger', 5),
+    'Rs1': 		('IntReg', 'udw', 'RS1', 'IsInteger', 6),
+    'Rs2': 		('IntReg', 'udw', 'RS2', 'IsInteger', 7),
+    # A microcode register. Right now, this is the only one.
+    'uReg0':		('IntReg', 'udw', 'NumIntArchRegs', 'IsInteger', 8),
+    # Because double and quad precision register numbers are decoded
+    # differently, they get different operands. The single precision versions
+    # have an s post pended to their name.
     'Frds':		('FloatReg', 'sf', 'RD', 'IsFloating', 10),
     'Frd':		('FloatReg', 'df', 'dfpr(RD)', 'IsFloating', 10),
     # Each Frd_N refers to the Nth double precision register from Frd.
@@ -83,14 +94,17 @@
     'Frs2':		('FloatReg', 'df', 'dfpr(RS2)', 'IsFloating', 12),
     'NPC': 		('NPC', 'udw', None, ( None, None, 'IsControl' ), 31),
     'NNPC':		('NNPC', 'udw', None, (None, None, 'IsControl' ), 32),
+    # Registers which are used explicitly in instructions
     'R0':  		('IntReg', 'udw', '0', None, 6),
     'R1':  		('IntReg', 'udw', '1', None, 7),
     'R15': 		('IntReg', 'udw', '15', 'IsInteger', 8),
     'R16': 		('IntReg', 'udw', '16', None, 9),
 
     # Control registers
-    'Y':		('ControlReg', 'udw', 'MISCREG_Y', None, 40),
-    'Ccr':		('ControlReg', 'udw', 'MISCREG_CCR', None, 41),
+#   'Y':		('ControlReg', 'udw', 'MISCREG_Y', None, 40),
+#   'Ccr':		('ControlReg', 'udw', 'MISCREG_CCR', None, 41),
+    'Y':		('IntReg', 'udw', 'NumIntArchRegs + 1', None, 40),
+    'Ccr':		('IntReg', 'udw', 'NumIntArchRegs + 2', None, 41),
     'Asi':		('ControlReg', 'udw', 'MISCREG_ASI', None, 42),
     'Fprs':		('ControlReg', 'udw', 'MISCREG_FPRS', None, 43),
     'Pcr':		('ControlReg', 'udw', 'MISCREG_PCR', None, 44),
@@ -112,12 +126,17 @@
     'Pstate':		('ControlReg', 'udw', 'MISCREG_PSTATE', None, 59),
     'Tl':		('ControlReg', 'udw', 'MISCREG_TL', None, 60),
     'Pil':		('ControlReg', 'udw', 'MISCREG_PIL', None, 61),
-    'Cwp':		('ControlReg', 'udw', 'MISCREG_CWP', None, 62),
-    'Cansave':		('ControlReg', 'udw', 'MISCREG_CANSAVE', None, 63),
-    'Canrestore':	('ControlReg', 'udw', 'MISCREG_CANRESTORE', None, 64),
-    'Cleanwin':		('ControlReg', 'udw', 'MISCREG_CLEANWIN', None, 65),
-    'Otherwin':		('ControlReg', 'udw', 'MISCREG_OTHERWIN', None, 66),
-    'Wstate':		('ControlReg', 'udw', 'MISCREG_WSTATE', None, 67),
+    'Cwp':		('ControlReg', 'udw', 'MISCREG_CWP', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 62),
+#   'Cansave':		('ControlReg', 'udw', 'MISCREG_CANSAVE', None, 63),
+#   'Canrestore':	('ControlReg', 'udw', 'MISCREG_CANRESTORE', None, 64),
+#   'Cleanwin':		('ControlReg', 'udw', 'MISCREG_CLEANWIN', None, 65),
+#   'Otherwin':		('ControlReg', 'udw', 'MISCREG_OTHERWIN', None, 66),
+#   'Wstate':		('ControlReg', 'udw', 'MISCREG_WSTATE', None, 67),
+    'Cansave':		('IntReg', 'udw', 'NumIntArchRegs + 3', None, 63),
+    'Canrestore':	('IntReg', 'udw', 'NumIntArchRegs + 4', None, 64),
+    'Cleanwin':		('IntReg', 'udw', 'NumIntArchRegs + 5', None, 65),
+    'Otherwin':		('IntReg', 'udw', 'NumIntArchRegs + 6', None, 66),
+    'Wstate':		('IntReg', 'udw', 'NumIntArchRegs + 7', None, 67),
     'Gl':               ('ControlReg', 'udw', 'MISCREG_GL', None, 68),
 
     'Hpstate':		('ControlReg', 'udw', 'MISCREG_HPSTATE', None, 69),
diff --git a/src/arch/sparc/isa_traits.hh b/src/arch/sparc/isa_traits.hh
index 062cc8d..64ae6ab 100644
--- a/src/arch/sparc/isa_traits.hh
+++ b/src/arch/sparc/isa_traits.hh
@@ -58,8 +58,8 @@
 
     // These enumerate all the registers for dependence tracking.
     enum DependenceTags {
-        FP_Base_DepTag = 33,
-        Ctrl_Base_DepTag = 97
+        FP_Base_DepTag = 32*3+8,
+        Ctrl_Base_DepTag = FP_Base_DepTag + 64
     };
 
     // semantically meaningful register indices
diff --git a/src/arch/sparc/miscregfile.cc b/src/arch/sparc/miscregfile.cc
index d9fcb02..422bc2f 100644
--- a/src/arch/sparc/miscregfile.cc
+++ b/src/arch/sparc/miscregfile.cc
@@ -46,15 +46,16 @@
 string SparcISA::getMiscRegName(RegIndex index)
 {
     static::string miscRegName[NumMiscRegs] =
-       {"y", "ccr", "asi", "tick", "fprs", "pcr", "pic",
-        "gsr", "softint_set", "softint_clr", "softint", "tick_cmpr",
-        "stick", "stick_cmpr",
-        "tpc", "tnpc", "tstate", "tt", "privtick", "tba", "pstate", "tl",
-        "pil", "cwp", "cansave", "canrestore", "cleanwin", "otherwin",
-        "wstate", "gl",
-        "hpstate", "htstate", "hintp", "htba", "hver", "strand_sts_reg",
-        "hstick_cmpr",
-        "fsr"};
+        {/*"y", "ccr",*/ "asi", "tick", "fprs", "pcr", "pic",
+         "gsr", "softint_set", "softint_clr", "softint", "tick_cmpr",
+         "stick", "stick_cmpr",
+         "tpc", "tnpc", "tstate", "tt", "privtick", "tba", "pstate", "tl",
+         "pil", "cwp", /*"cansave", "canrestore", "cleanwin", "otherwin",
+         "wstate",*/ "gl",
+         "hpstate", "htstate", "hintp", "htba", "hver", "strand_sts_reg",
+         "hstick_cmpr",
+         "fsr"};
+
     return miscRegName[index];
 }
 
@@ -65,8 +66,8 @@
 
 void MiscRegFile::clear()
 {
-    y = 0;
-    ccr = 0;
+    //y = 0;
+    //ccr = 0;
     asi = 0;
     tick = ULL(1) << 63;
     fprs = 0;
@@ -83,11 +84,11 @@
     tl = 0;
     pil = 0;
     cwp = 0;
-    cansave = 0;
-    canrestore = 0;
-    cleanwin = 0;
-    otherwin = 0;
-    wstate = 0;
+    //cansave = 0;
+    //canrestore = 0;
+    //cleanwin = 0;
+    //otherwin = 0;
+    //wstate = 0;
     gl = 0;
     //In a T1, bit 11 is apparently always 1
     hpstate = (1 << 11);
@@ -149,10 +150,10 @@
                (uint64_t)priContext << 32 |
                (uint64_t)secContext << 48;
 
-      case MISCREG_Y:
-        return y;
-      case MISCREG_CCR:
-        return ccr;
+      //case MISCREG_Y:
+      //  return y;
+      //case MISCREG_CCR:
+      //  return ccr;
       case MISCREG_ASI:
         return asi;
       case MISCREG_FPRS:
@@ -195,16 +196,16 @@
         return pil;
       case MISCREG_CWP:
         return cwp;
-      case MISCREG_CANSAVE:
-        return cansave;
-      case MISCREG_CANRESTORE:
-        return canrestore;
-      case MISCREG_CLEANWIN:
-        return cleanwin;
-      case MISCREG_OTHERWIN:
-        return otherwin;
-      case MISCREG_WSTATE:
-        return wstate;
+      //case MISCREG_CANSAVE:
+      //  return cansave;
+      //case MISCREG_CANRESTORE:
+      //  return canrestore;
+      //case MISCREG_CLEANWIN:
+      //  return cleanwin;
+      //case MISCREG_OTHERWIN:
+      //  return otherwin;
+      //case MISCREG_WSTATE:
+      //  return wstate;
       case MISCREG_GL:
         return gl;
 
@@ -375,12 +376,12 @@
 void MiscRegFile::setReg(int miscReg, const MiscReg &val)
 {
     switch (miscReg) {
-      case MISCREG_Y:
-        y = val;
-        break;
-      case MISCREG_CCR:
-        ccr = val;
-        break;
+//      case MISCREG_Y:
+//        y = val;
+//        break;
+//      case MISCREG_CCR:
+//        ccr = val;
+//        break;
       case MISCREG_ASI:
         asi = val;
         break;
@@ -441,21 +442,21 @@
       case MISCREG_CWP:
         cwp = val;
         break;
-      case MISCREG_CANSAVE:
-        cansave = val;
-        break;
-      case MISCREG_CANRESTORE:
-        canrestore = val;
-        break;
-      case MISCREG_CLEANWIN:
-        cleanwin = val;
-        break;
-      case MISCREG_OTHERWIN:
-        otherwin = val;
-        break;
-      case MISCREG_WSTATE:
-        wstate = val;
-        break;
+//      case MISCREG_CANSAVE:
+//        cansave = val;
+//        break;
+//      case MISCREG_CANRESTORE:
+//        canrestore = val;
+//        break;
+//      case MISCREG_CLEANWIN:
+//        cleanwin = val;
+//        break;
+//      case MISCREG_OTHERWIN:
+//        otherwin = val;
+//        break;
+//      case MISCREG_WSTATE:
+//        wstate = val;
+//        break;
       case MISCREG_GL:
         gl = val;
         break;
@@ -676,23 +677,23 @@
 {
     SERIALIZE_SCALAR(pstate);
     SERIALIZE_SCALAR(tba);
-    SERIALIZE_SCALAR(y);
+//    SERIALIZE_SCALAR(y);
     SERIALIZE_SCALAR(pil);
     SERIALIZE_SCALAR(gl);
     SERIALIZE_SCALAR(cwp);
     SERIALIZE_ARRAY(tt, MaxTL);
-    SERIALIZE_SCALAR(ccr);
+//    SERIALIZE_SCALAR(ccr);
     SERIALIZE_SCALAR(asi);
     SERIALIZE_SCALAR(tl);
     SERIALIZE_ARRAY(tpc, MaxTL);
     SERIALIZE_ARRAY(tnpc, MaxTL);
     SERIALIZE_ARRAY(tstate, MaxTL);
     SERIALIZE_SCALAR(tick);
-    SERIALIZE_SCALAR(cansave);
-    SERIALIZE_SCALAR(canrestore);
-    SERIALIZE_SCALAR(otherwin);
-    SERIALIZE_SCALAR(cleanwin);
-    SERIALIZE_SCALAR(wstate);
+//    SERIALIZE_SCALAR(cansave);
+//    SERIALIZE_SCALAR(canrestore);
+//    SERIALIZE_SCALAR(otherwin);
+//    SERIALIZE_SCALAR(cleanwin);
+//    SERIALIZE_SCALAR(wstate);
     SERIALIZE_SCALAR(fsr);
     SERIALIZE_SCALAR(fprs);
     SERIALIZE_SCALAR(hpstate);
@@ -735,23 +736,23 @@
 {
     UNSERIALIZE_SCALAR(pstate);
     UNSERIALIZE_SCALAR(tba);
-    UNSERIALIZE_SCALAR(y);
+//    UNSERIALIZE_SCALAR(y);
     UNSERIALIZE_SCALAR(pil);
     UNSERIALIZE_SCALAR(gl);
     UNSERIALIZE_SCALAR(cwp);
     UNSERIALIZE_ARRAY(tt, MaxTL);
-    UNSERIALIZE_SCALAR(ccr);
+//    UNSERIALIZE_SCALAR(ccr);
     UNSERIALIZE_SCALAR(asi);
     UNSERIALIZE_SCALAR(tl);
     UNSERIALIZE_ARRAY(tpc, MaxTL);
     UNSERIALIZE_ARRAY(tnpc, MaxTL);
     UNSERIALIZE_ARRAY(tstate, MaxTL);
     UNSERIALIZE_SCALAR(tick);
-    UNSERIALIZE_SCALAR(cansave);
-    UNSERIALIZE_SCALAR(canrestore);
-    UNSERIALIZE_SCALAR(otherwin);
-    UNSERIALIZE_SCALAR(cleanwin);
-    UNSERIALIZE_SCALAR(wstate);
+//    UNSERIALIZE_SCALAR(cansave);
+//    UNSERIALIZE_SCALAR(canrestore);
+//    UNSERIALIZE_SCALAR(otherwin);
+//    UNSERIALIZE_SCALAR(cleanwin);
+//    UNSERIALIZE_SCALAR(wstate);
     UNSERIALIZE_SCALAR(fsr);
     UNSERIALIZE_SCALAR(fprs);
     UNSERIALIZE_SCALAR(hpstate);
diff --git a/src/arch/sparc/miscregfile.hh b/src/arch/sparc/miscregfile.hh
index 8a2e8e8..66c9f17 100644
--- a/src/arch/sparc/miscregfile.hh
+++ b/src/arch/sparc/miscregfile.hh
@@ -47,8 +47,8 @@
     enum MiscRegIndex
     {
         /** Ancillary State Registers */
-        MISCREG_Y, /* 0 */
-        MISCREG_CCR,
+//        MISCREG_Y,
+//        MISCREG_CCR,
         MISCREG_ASI,
         MISCREG_TICK,
         MISCREG_FPRS,
@@ -73,11 +73,11 @@
         MISCREG_TL,
         MISCREG_PIL,
         MISCREG_CWP,
-        MISCREG_CANSAVE,
-        MISCREG_CANRESTORE,
-        MISCREG_CLEANWIN,
-        MISCREG_OTHERWIN,
-        MISCREG_WSTATE,
+//        MISCREG_CANSAVE,
+//        MISCREG_CANRESTORE,
+//        MISCREG_CLEANWIN,
+//        MISCREG_OTHERWIN,
+//        MISCREG_WSTATE,
         MISCREG_GL,
 
         /** Hyper privileged registers */
@@ -171,8 +171,8 @@
       private:
 
         /* ASR Registers */
-        uint64_t y;		// Y (used in obsolete multiplication)
-        uint8_t	ccr;		// Condition Code Register
+        //uint64_t y;		// Y (used in obsolete multiplication)
+        //uint8_t ccr;		// Condition Code Register
         uint8_t asi;		// Address Space Identifier
         uint64_t tick;		// Hardware clock-tick counter
         uint8_t	fprs;		// Floating-Point Register State
@@ -197,11 +197,11 @@
         uint8_t tl;		// Trap Level
         uint8_t pil;		// Process Interrupt Register
         uint8_t cwp;		// Current Window Pointer
-        uint8_t cansave;	// Savable windows
-        uint8_t canrestore;	// Restorable windows
-        uint8_t cleanwin;	// Clean windows
-        uint8_t otherwin;	// Other windows
-        uint8_t wstate;		// Window State
+        //uint8_t cansave;	// Savable windows
+        //uint8_t canrestore;	// Restorable windows
+        //uint8_t cleanwin;	// Clean windows
+        //uint8_t otherwin;	// Other windows
+        //uint8_t wstate;		// Window State
         uint8_t gl;             // Global level register
 
         /** Hyperprivileged Registers */
diff --git a/src/arch/sparc/process.cc b/src/arch/sparc/process.cc
index 405e408..1e639b9 100644
--- a/src/arch/sparc/process.cc
+++ b/src/arch/sparc/process.cc
@@ -95,17 +95,22 @@
      */
 
     //No windows contain info from other programs
-    threadContexts[0]->setMiscReg(MISCREG_OTHERWIN, 0);
+    //threadContexts[0]->setMiscReg(MISCREG_OTHERWIN, 0);
+    threadContexts[0]->setIntReg(NumIntArchRegs + 6, 0);
     //There are no windows to pop
-    threadContexts[0]->setMiscReg(MISCREG_CANRESTORE, 0);
+    //threadContexts[0]->setMiscReg(MISCREG_CANRESTORE, 0);
+    threadContexts[0]->setIntReg(NumIntArchRegs + 4, 0);
     //All windows are available to save into
-    threadContexts[0]->setMiscReg(MISCREG_CANSAVE, NWindows - 2);
+    //threadContexts[0]->setMiscReg(MISCREG_CANSAVE, NWindows - 2);
+    threadContexts[0]->setIntReg(NumIntArchRegs + 3, NWindows - 2);
     //All windows are "clean"
-    threadContexts[0]->setMiscReg(MISCREG_CLEANWIN, NWindows);
+    //threadContexts[0]->setMiscReg(MISCREG_CLEANWIN, NWindows);
+    threadContexts[0]->setIntReg(NumIntArchRegs + 5, NWindows);
     //Start with register window 0
     threadContexts[0]->setMiscReg(MISCREG_CWP, 0);
     //Always use spill and fill traps 0
-    threadContexts[0]->setMiscReg(MISCREG_WSTATE, 0);
+    //threadContexts[0]->setMiscReg(MISCREG_WSTATE, 0);
+    threadContexts[0]->setIntReg(NumIntArchRegs + 7, 0);
     //Set the trap level to 0
     threadContexts[0]->setMiscReg(MISCREG_TL, 0);
     //Set the ASI register to something fixed
@@ -427,5 +432,8 @@
     threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst));
     threadContexts[0]->setNextNPC(prog_entry + (2 * sizeof(MachInst)));
 
+    //Align the "stack_min" to a page boundary.
+    stack_min = roundDown(stack_min, pageSize);
+
 //    num_processes++;
 }
diff --git a/src/arch/sparc/regfile.cc b/src/arch/sparc/regfile.cc
index b361335..d39892e 100644
--- a/src/arch/sparc/regfile.cc
+++ b/src/arch/sparc/regfile.cc
@@ -151,6 +151,74 @@
     intRegFile.setReg(intReg, val);
 }
 
+int SparcISA::flattenIntIndex(ThreadContext * tc, int reg)
+{
+    int gl = tc->readMiscReg(MISCREG_GL);
+    int cwp = tc->readMiscReg(MISCREG_CWP);
+    //DPRINTF(Sparc, "Global Level = %d, Current Window Pointer = %d\n", gl, cwp);
+    int newReg;
+    //The total number of global registers
+    int numGlobals = (MaxGL + 1) * 8;
+    if(reg < 8)
+    {
+        //Global register
+        //Put it in the appropriate set of globals
+        newReg = reg + gl * 8;
+    }
+    else if(reg < NumIntArchRegs)
+    {
+        //Regular windowed register
+        //Put it in the window pointed to by cwp
+        newReg = numGlobals +
+            ((reg - 8 - cwp * 16 + NWindows * 16) % (NWindows * 16));
+    }
+    else if(reg < NumIntArchRegs + NumMicroIntRegs)
+    {
+        //Microcode register
+        //Displace from the end of the regular registers
+        newReg = reg - NumIntArchRegs + numGlobals + NWindows * 16;
+    }
+    else if(reg < 2 * NumIntArchRegs + NumMicroIntRegs)
+    {
+        reg -= (NumIntArchRegs + NumMicroIntRegs);
+        if(reg < 8)
+        {
+            //Global register from the next window
+            //Put it in the appropriate set of globals
+            newReg = reg + gl * 8;
+        }
+        else
+        {
+            //Windowed register from the previous window
+            //Put it in the window before the one pointed to by cwp
+            newReg = numGlobals +
+                ((reg - 8 - (cwp - 1) * 16 + NWindows * 16) % (NWindows * 16));
+        }
+    }
+    else if(reg < 3 * NumIntArchRegs + NumMicroIntRegs)
+    {
+        reg -= (2 * NumIntArchRegs + NumMicroIntRegs);
+        if(reg < 8)
+        {
+            //Global register from the previous window
+            //Put it in the appropriate set of globals
+            newReg = reg + gl * 8;
+        }
+        else
+        {
+            //Windowed register from the next window
+            //Put it in the window after the one pointed to by cwp
+            newReg = numGlobals +
+                ((reg - 8 - (cwp + 1) * 16 + NWindows * 16) % (NWindows * 16));
+        }
+    }
+    else
+        panic("Tried to flatten invalid register index %d!\n", reg);
+    DPRINTF(Sparc, "Flattened register %d to %d.\n", reg, newReg);
+    return newReg;
+    //return intRegFile.flattenIndex(reg);
+}
+
 void RegFile::serialize(std::ostream &os)
 {
     intRegFile.serialize(os);
@@ -220,8 +288,8 @@
 
 
     // ASRs
-    dest->setMiscReg(MISCREG_Y, src->readMiscReg(MISCREG_Y));
-    dest->setMiscReg(MISCREG_CCR, src->readMiscReg(MISCREG_CCR));
+//    dest->setMiscReg(MISCREG_Y, src->readMiscReg(MISCREG_Y));
+//    dest->setMiscReg(MISCREG_CCR, src->readMiscReg(MISCREG_CCR));
     dest->setMiscReg(MISCREG_ASI, src->readMiscReg(MISCREG_ASI));
     dest->setMiscReg(MISCREG_TICK, src->readMiscReg(MISCREG_TICK));
     dest->setMiscReg(MISCREG_FPRS, src->readMiscReg(MISCREG_FPRS));
@@ -236,11 +304,11 @@
     dest->setMiscReg(MISCREG_PSTATE, src->readMiscReg(MISCREG_PSTATE));
     dest->setMiscReg(MISCREG_PIL, src->readMiscReg(MISCREG_PIL));
     dest->setMiscReg(MISCREG_CWP, src->readMiscReg(MISCREG_CWP));
-    dest->setMiscReg(MISCREG_CANSAVE, src->readMiscReg(MISCREG_CANSAVE));
-    dest->setMiscReg(MISCREG_CANRESTORE, src->readMiscReg(MISCREG_CANRESTORE));
-    dest->setMiscReg(MISCREG_OTHERWIN, src->readMiscReg(MISCREG_OTHERWIN));
-    dest->setMiscReg(MISCREG_CLEANWIN, src->readMiscReg(MISCREG_CLEANWIN));
-    dest->setMiscReg(MISCREG_WSTATE, src->readMiscReg(MISCREG_WSTATE));
+//    dest->setMiscReg(MISCREG_CANSAVE, src->readMiscReg(MISCREG_CANSAVE));
+//    dest->setMiscReg(MISCREG_CANRESTORE, src->readMiscReg(MISCREG_CANRESTORE));
+//    dest->setMiscReg(MISCREG_OTHERWIN, src->readMiscReg(MISCREG_OTHERWIN));
+//    dest->setMiscReg(MISCREG_CLEANWIN, src->readMiscReg(MISCREG_CLEANWIN));
+//    dest->setMiscReg(MISCREG_WSTATE, src->readMiscReg(MISCREG_WSTATE));
     dest->setMiscReg(MISCREG_GL, src->readMiscReg(MISCREG_GL));
 
     // Hyperprivilged registers
diff --git a/src/arch/sparc/regfile.hh b/src/arch/sparc/regfile.hh
index 0a09d0f..d9af075 100644
--- a/src/arch/sparc/regfile.hh
+++ b/src/arch/sparc/regfile.hh
@@ -120,6 +120,8 @@
         void changeContext(RegContextParam param, RegContextVal val);
     };
 
+    int flattenIntIndex(ThreadContext * tc, int reg);
+
     void copyRegs(ThreadContext *src, ThreadContext *dest);
 
     void copyMiscRegs(ThreadContext *src, ThreadContext *dest);
diff --git a/src/arch/sparc/sparc_traits.hh b/src/arch/sparc/sparc_traits.hh
index a3d29ea..d89ec11 100644
--- a/src/arch/sparc/sparc_traits.hh
+++ b/src/arch/sparc/sparc_traits.hh
@@ -41,7 +41,8 @@
 
     // Number of register windows, can legally be 3 to 32
     const int NWindows = 8;
-    const int NumMicroIntRegs = 1;
+    //const int NumMicroIntRegs = 1;
+    const int NumMicroIntRegs = 8;
 
 //    const int NumRegularIntRegs = MaxGL * 8 + NWindows * 16;
 //    const int NumMicroIntRegs = 1;
diff --git a/src/arch/sparc/syscallreturn.hh b/src/arch/sparc/syscallreturn.hh
index 75a063d..d92b127 100644
--- a/src/arch/sparc/syscallreturn.hh
+++ b/src/arch/sparc/syscallreturn.hh
@@ -33,58 +33,30 @@
 
 #include <inttypes.h>
 
+#include "sim/syscallreturn.hh"
 #include "arch/sparc/regfile.hh"
-
-class SyscallReturn
-{
-  public:
-    template <class T>
-    SyscallReturn(T v, bool s)
-    {
-        retval = (uint64_t)v;
-        success = s;
-    }
-
-    template <class T>
-    SyscallReturn(T v)
-    {
-        success = (v >= 0);
-        retval = (uint64_t)v;
-    }
-
-    ~SyscallReturn() {}
-
-    SyscallReturn& operator=(const SyscallReturn& s)
-    {
-        retval = s.retval;
-        success = s.success;
-        return *this;
-    }
-
-    bool successful() { return success; }
-    uint64_t value() { return retval; }
-
-    private:
-    uint64_t retval;
-    bool success;
-};
+#include "cpu/thread_context.hh"
 
 namespace SparcISA
 {
     static inline void setSyscallReturn(SyscallReturn return_value,
-            RegFile *regs)
+            ThreadContext * tc)
     {
         // check for error condition.  SPARC syscall convention is to
         // indicate success/failure in reg the carry bit of the ccr
         // and put the return value itself in the standard return value reg ().
         if (return_value.successful()) {
             // no error, clear XCC.C
-            regs->setMiscReg(MISCREG_CCR, regs->readMiscReg(MISCREG_CCR) & 0xEE);
-            regs->setIntReg(ReturnValueReg, return_value.value());
+            tc->setIntReg(NumIntArchRegs + 2,
+                    tc->readIntReg(NumIntArchRegs + 2) & 0xEE);
+            //tc->setMiscReg(MISCREG_CCR, tc->readMiscReg(MISCREG_CCR) & 0xEE);
+            tc->setIntReg(ReturnValueReg, return_value.value());
         } else {
             // got an error, set XCC.C
-            regs->setMiscReg(MISCREG_CCR, regs->readMiscReg(MISCREG_CCR) | 0x11);
-            regs->setIntReg(ReturnValueReg, return_value.value());
+            tc->setIntReg(NumIntArchRegs + 2,
+                    tc->readIntReg(NumIntArchRegs + 2) | 0x11);
+            //tc->setMiscReg(MISCREG_CCR, tc->readMiscReg(MISCREG_CCR) | 0x11);
+            tc->setIntReg(ReturnValueReg, -return_value.value());
         }
     }
 };
diff --git a/src/arch/sparc/system.cc b/src/arch/sparc/system.cc
index 800d47c..2600213 100644
--- a/src/arch/sparc/system.cc
+++ b/src/arch/sparc/system.cc
@@ -191,13 +191,6 @@
     delete partition_desc;
 }
 
-bool
-SparcSystem::breakpoint()
-{
-    panic("Need to implement");
-    M5_DUMMY_RETURN
-}
-
 void
 SparcSystem::serialize(std::ostream &os)
 {
diff --git a/src/arch/sparc/system.hh b/src/arch/sparc/system.hh
index c81b093..ac4d342 100644
--- a/src/arch/sparc/system.hh
+++ b/src/arch/sparc/system.hh
@@ -68,8 +68,6 @@
 
     ~SparcSystem();
 
-    virtual bool breakpoint();
-
 /**
  * Serialization stuff
  */
diff --git a/src/arch/sparc/tlb.cc b/src/arch/sparc/tlb.cc
index 6ed6f59..bc3c392 100644
--- a/src/arch/sparc/tlb.cc
+++ b/src/arch/sparc/tlb.cc
@@ -176,8 +176,6 @@
         lookupTable.erase(new_entry->range);
 
 
-    DPRINTF(TLB, "Using entry: %#X\n", new_entry);
-
     assert(PTE.valid());
     new_entry->range.va = va;
     new_entry->range.size = PTE.size() - 1;
@@ -287,7 +285,6 @@
             usedEntries--;
         }
         freeList.push_front(i->second);
-        DPRINTF(TLB, "Freeing TLB entry : %#X\n", i->second);
         lookupTable.erase(i);
     }
 }
@@ -304,7 +301,6 @@
             tlb[x].range.partitionId == partition_id) {
             if (tlb[x].valid == true) {
                 freeList.push_front(&tlb[x]);
-                DPRINTF(TLB, "Freeing TLB entry : %#X\n", &tlb[x]);
             }
             tlb[x].valid = false;
             if (tlb[x].used) {
@@ -326,7 +322,6 @@
         if (!tlb[x].pte.locked() && tlb[x].range.partitionId == partition_id) {
             if (tlb[x].valid == true){
                 freeList.push_front(&tlb[x]);
-                DPRINTF(TLB, "Freeing TLB entry : %#X\n", &tlb[x]);
             }
             tlb[x].valid = false;
             if (tlb[x].used) {
diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index 2c0da48..988b50c 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -121,16 +121,21 @@
 #include <string>
 #include <unistd.h>
 
+#include "config/full_system.hh"
+
+#if FULL_SYSTEM
 #include "arch/vtophys.hh"
+#endif
+
 #include "base/intmath.hh"
 #include "base/remote_gdb.hh"
 #include "base/socket.hh"
 #include "base/trace.hh"
-#include "config/full_system.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/static_inst.hh"
-#include "mem/physical.hh"
+//#include "mem/physical.hh"
 #include "mem/port.hh"
+#include "mem/translating_port.hh"
 #include "sim/system.hh"
 
 using namespace std;
@@ -448,9 +453,17 @@
 
     DPRINTF(GDBRead, "read:  addr=%#x, size=%d", vaddr, size);
 
-    VirtualPort *vp = context->getVirtPort(context);
-    vp->readBlob(vaddr, (uint8_t*)data, size);
-    context->delVirtPort(vp);
+#if FULL_SYSTEM
+    VirtualPort *port = context->getVirtPort(context);
+#else
+    TranslatingPort *port = context->getMemPort();
+#endif
+    port->readBlob(vaddr, (uint8_t*)data, size);
+#if FULL_SYSTEM
+    context->delVirtPort(port);
+#else
+    delete port;
+#endif
 
 #if TRACING_ON
     if (DTRACE(GDBRead)) {
@@ -487,9 +500,17 @@
         } else
             DPRINTFNR("\n");
     }
-    VirtualPort *vp = context->getVirtPort(context);
-    vp->writeBlob(vaddr, (uint8_t*)data, size);
-    context->delVirtPort(vp);
+#if FULL_SYSTEM
+    VirtualPort *port = context->getVirtPort(context);
+#else
+    TranslatingPort *port = context->getMemPort();
+#endif
+    port->writeBlob(vaddr, (uint8_t*)data, size);
+#if FULL_SYSTEM
+    context->delVirtPort(port);
+#else
+    delete port;
+#endif
 
     return true;
 }
diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh
index 9a3201c..92e5995 100644
--- a/src/base/remote_gdb.hh
+++ b/src/base/remote_gdb.hh
@@ -32,6 +32,7 @@
 #define __REMOTE_GDB_HH__
 
 #include <map>
+#include <sys/signal.h>
 
 #include "arch/types.hh"
 #include "cpu/pc_event.hh"
@@ -177,6 +178,10 @@
 
     virtual bool acc(Addr addr, size_t len) = 0;
     bool trap(int type);
+    virtual bool breakpoint()
+    {
+        return trap(SIGTRAP);
+    }
 
   protected:
     virtual void getregs() = 0;
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 9037c96..515cd08 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -39,6 +39,7 @@
 #include "base/fast_alloc.hh"
 #include "base/trace.hh"
 #include "config/full_system.hh"
+#include "cpu/o3/comm.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/op_class.hh"
@@ -62,10 +63,6 @@
     typedef typename Impl::CPUType ImplCPU;
     typedef typename ImplCPU::ImplState ImplState;
 
-    // Binary machine instruction type.
-    typedef TheISA::MachInst MachInst;
-    // Extended machine instruction type
-    typedef TheISA::ExtMachInst ExtMachInst;
     // Logical register index type.
     typedef TheISA::RegIndex RegIndex;
     // Integer register type.
@@ -212,6 +209,7 @@
     /** PC of this instruction. */
     Addr PC;
 
+  protected:
     /** Next non-speculative PC.  It is not filled in at fetch, but rather
      *  once the target of the branch is truly known (either decode or
      *  execute).
@@ -224,6 +222,14 @@
     /** Predicted next PC. */
     Addr predPC;
 
+    /** Predicted next NPC. */
+    Addr predNPC;
+
+    /** If this is a branch that was predicted taken */
+    bool predTaken;
+
+  public:
+
     /** Count of total number of dynamic instructions. */
     static int instcount;
 
@@ -236,16 +242,116 @@
      */
     bool _readySrcRegIdx[MaxInstSrcRegs];
 
+  protected:
+    /** Flattened register index of the destination registers of this
+     *  instruction.
+     */
+    TheISA::RegIndex _flatDestRegIdx[TheISA::MaxInstDestRegs];
+
+    /** Flattened register index of the source registers of this
+     *  instruction.
+     */
+    TheISA::RegIndex _flatSrcRegIdx[TheISA::MaxInstSrcRegs];
+
+    /** Physical register index of the destination registers of this
+     *  instruction.
+     */
+    PhysRegIndex _destRegIdx[TheISA::MaxInstDestRegs];
+
+    /** Physical register index of the source registers of this
+     *  instruction.
+     */
+    PhysRegIndex _srcRegIdx[TheISA::MaxInstSrcRegs];
+
+    /** Physical register index of the previous producers of the
+     *  architected destinations.
+     */
+    PhysRegIndex _prevDestRegIdx[TheISA::MaxInstDestRegs];
+
   public:
+
+    /** Returns the physical register index of the i'th destination
+     *  register.
+     */
+    PhysRegIndex renamedDestRegIdx(int idx) const
+    {
+        return _destRegIdx[idx];
+    }
+
+    /** Returns the physical register index of the i'th source register. */
+    PhysRegIndex renamedSrcRegIdx(int idx) const
+    {
+        return _srcRegIdx[idx];
+    }
+
+    /** Returns the flattened register index of the i'th destination
+     *  register.
+     */
+    TheISA::RegIndex flattenedDestRegIdx(int idx) const
+    {
+        return _flatDestRegIdx[idx];
+    }
+
+    /** Returns the flattened register index of the i'th source register */
+    TheISA::RegIndex flattenedSrcRegIdx(int idx) const
+    {
+        return _flatSrcRegIdx[idx];
+    }
+
+    /** Returns the physical register index of the previous physical register
+     *  that remapped to the same logical register index.
+     */
+    PhysRegIndex prevDestRegIdx(int idx) const
+    {
+        return _prevDestRegIdx[idx];
+    }
+
+    /** Renames a destination register to a physical register.  Also records
+     *  the previous physical register that the logical register mapped to.
+     */
+    void renameDestReg(int idx,
+                       PhysRegIndex renamed_dest,
+                       PhysRegIndex previous_rename)
+    {
+        _destRegIdx[idx] = renamed_dest;
+        _prevDestRegIdx[idx] = previous_rename;
+    }
+
+    /** Renames a source logical register to the physical register which
+     *  has/will produce that logical register's result.
+     *  @todo: add in whether or not the source register is ready.
+     */
+    void renameSrcReg(int idx, PhysRegIndex renamed_src)
+    {
+        _srcRegIdx[idx] = renamed_src;
+    }
+
+    /** Flattens a source architectural register index into a logical index.
+     */
+    void flattenSrcReg(int idx, TheISA::RegIndex flattened_src)
+    {
+        _flatSrcRegIdx[idx] = flattened_src;
+    }
+
+    /** Flattens a destination architectural register index into a logical
+     * index.
+     */
+    void flattenDestReg(int idx, TheISA::RegIndex flattened_dest)
+    {
+        _flatDestRegIdx[idx] = flattened_dest;
+    }
+
     /** BaseDynInst constructor given a binary instruction.
      *  @param inst The binary instruction.
      *  @param PC The PC of the instruction.
      *  @param pred_PC The predicted next PC.
+     *  @param pred_NPC The predicted next NPC.
      *  @param seq_num The sequence number of the instruction.
      *  @param cpu Pointer to the instruction's CPU.
      */
-    BaseDynInst(ExtMachInst inst, Addr PC, Addr pred_PC, InstSeqNum seq_num,
-                ImplCPU *cpu);
+    BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
+            Addr pred_PC, Addr pred_NPC,
+            InstSeqNum seq_num, ImplCPU *cpu);
 
     /** BaseDynInst constructor given a StaticInst pointer.
      *  @param _staticInst The StaticInst for this BaseDynInst.
@@ -287,29 +393,46 @@
     /** Returns the next NPC.  This could be the speculative next NPC if it is
      *  called prior to the actual branch target being calculated.
      */
-    Addr readNextNPC() { return nextNPC; }
+    Addr readNextNPC()
+    {
+#if ISA_HAS_DELAY_SLOT
+        return nextNPC;
+#else
+        return nextPC + sizeof(TheISA::MachInst);
+#endif
+    }
 
     /** Set the predicted target of this current instruction. */
-    void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; }
+    void setPredTarg(Addr predicted_PC, Addr predicted_NPC)
+    {
+        predPC = predicted_PC;
+        predNPC = predicted_NPC;
+    }
 
-    /** Returns the predicted target of the branch. */
-    Addr readPredTarg() { return predPC; }
+    /** Returns the predicted PC immediately after the branch. */
+    Addr readPredPC() { return predPC; }
+
+    /** Returns the predicted PC two instructions after the branch */
+    Addr readPredNPC() { return predNPC; }
 
     /** Returns whether the instruction was predicted taken or not. */
-    bool predTaken()
-#if ISA_HAS_DELAY_SLOT
-    { return predPC != (nextPC + sizeof(MachInst)); }
-#else
-    { return predPC != (PC + sizeof(MachInst)); }
-#endif
+    bool readPredTaken()
+    {
+        return predTaken;
+    }
+
+    void setPredTaken(bool predicted_taken)
+    {
+        predTaken = predicted_taken;
+    }
 
     /** Returns whether the instruction mispredicted. */
     bool mispredicted()
-#if ISA_HAS_DELAY_SLOT
-    { return predPC != nextNPC; }
-#else
-    { return predPC != nextPC; }
-#endif
+    {
+        return readPredPC() != readNextPC() ||
+            readPredNPC() != readNextNPC();
+    }
+
     //
     //  Instruction types.  Forward checks to StaticInst object.
     //
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index 2f6859d..c3d71e4 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -62,17 +62,20 @@
 #endif
 
 template <class Impl>
-BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst machInst, Addr inst_PC,
-                               Addr pred_PC, InstSeqNum seq_num,
-                               ImplCPU *cpu)
+BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst machInst,
+                               Addr inst_PC, Addr inst_NPC,
+                               Addr pred_PC, Addr pred_NPC,
+                               InstSeqNum seq_num, ImplCPU *cpu)
   : staticInst(machInst), traceData(NULL), cpu(cpu)
 {
     seqNum = seq_num;
 
     PC = inst_PC;
-    nextPC = PC + sizeof(TheISA::MachInst);
+    nextPC = inst_NPC;
     nextNPC = nextPC + sizeof(TheISA::MachInst);
     predPC = pred_PC;
+    predNPC = pred_NPC;
+    predTaken = false;
 
     initVars();
 }
diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc
index 9ea9068..bfd7012 100644
--- a/src/cpu/exetrace.cc
+++ b/src/cpu/exetrace.cc
@@ -147,13 +147,15 @@
             outs << hex;
             outs << "PC = " << thread->readNextPC();
             outs << " NPC = " << thread->readNextNPC();
-            newVal = thread->readMiscReg(SparcISA::MISCREG_CCR);
+            newVal = thread->readIntReg(SparcISA::NumIntArchRegs + 2);
+            //newVal = thread->readMiscReg(SparcISA::MISCREG_CCR);
             if(newVal != ccr)
             {
                 outs << " CCR = " << newVal;
                 ccr = newVal;
             }
-            newVal = thread->readMiscReg(SparcISA::MISCREG_Y);
+            newVal = thread->readIntReg(SparcISA::NumIntArchRegs + 1);
+            //newVal = thread->readMiscReg(SparcISA::MISCREG_Y);
             if(newVal != y)
             {
                 outs << " Y = " << newVal;
@@ -403,9 +405,13 @@
                         diffHtba = true;
                     if(shared_data->pstate != thread->readMiscReg(MISCREG_PSTATE))
                         diffPstate = true;
-                    if(shared_data->y != thread->readMiscReg(MISCREG_Y))
+                    //if(shared_data->y != thread->readMiscReg(MISCREG_Y))
+                    if(shared_data->y !=
+                            thread->readIntReg(NumIntArchRegs + 1))
                         diffY = true;
-                    if(shared_data->ccr != thread->readMiscReg(MISCREG_CCR))
+                    //if(shared_data->ccr != thread->readMiscReg(MISCREG_CCR))
+                    if(shared_data->ccr !=
+                            thread->readIntReg(NumIntArchRegs + 2))
                         diffCcr = true;
                     if(shared_data->gl != thread->readMiscReg(MISCREG_GL))
                         diffGl = true;
@@ -415,14 +421,22 @@
                         diffPil = true;
                     if(shared_data->cwp != thread->readMiscReg(MISCREG_CWP))
                         diffCwp = true;
-                    if(shared_data->cansave != thread->readMiscReg(MISCREG_CANSAVE))
+                    //if(shared_data->cansave != thread->readMiscReg(MISCREG_CANSAVE))
+                    if(shared_data->cansave !=
+                            thread->readIntReg(NumIntArchRegs + 3))
                         diffCansave = true;
+                    //if(shared_data->canrestore !=
+                    //	    thread->readMiscReg(MISCREG_CANRESTORE))
                     if(shared_data->canrestore !=
-                            thread->readMiscReg(MISCREG_CANRESTORE))
+                            thread->readMiscReg(NumIntArchRegs + 4))
                         diffCanrestore = true;
-                    if(shared_data->otherwin != thread->readMiscReg(MISCREG_OTHERWIN))
+                    //if(shared_data->otherwin != thread->readMiscReg(MISCREG_OTHERWIN))
+                    if(shared_data->otherwin !=
+                            thread->readIntReg(NumIntArchRegs + 5))
                         diffOtherwin = true;
-                    if(shared_data->cleanwin != thread->readMiscReg(MISCREG_CLEANWIN))
+                    //if(shared_data->cleanwin != thread->readMiscReg(MISCREG_CLEANWIN))
+                    if(shared_data->cleanwin !=
+                            thread->readMiscReg(NumIntArchRegs + 6))
                         diffCleanwin = true;
 
                     for (int i = 0; i < 64; i++) {
@@ -538,10 +552,12 @@
                                 thread->readMiscReg(MISCREG_PSTATE),
                                 shared_data->pstate);
                         printRegPair(outs, "Y",
-                                thread->readMiscReg(MISCREG_Y),
+                                //thread->readMiscReg(MISCREG_Y),
+                                thread->readMiscReg(NumIntArchRegs + 1),
                                 shared_data->y);
                         printRegPair(outs, "Ccr",
-                                thread->readMiscReg(MISCREG_CCR),
+                                //thread->readMiscReg(MISCREG_CCR),
+                                thread->readMiscReg(NumIntArchRegs + 2),
                                 shared_data->ccr);
                         printRegPair(outs, "Tl",
                                 thread->readMiscReg(MISCREG_TL),
@@ -559,16 +575,20 @@
                                 thread->readMiscReg(MISCREG_CWP),
                                 shared_data->cwp);
                         printRegPair(outs, "Cansave",
-                                thread->readMiscReg(MISCREG_CANSAVE),
+                                //thread->readMiscReg(MISCREG_CANSAVE),
+                                thread->readIntReg(NumIntArchRegs + 3),
                                 shared_data->cansave);
                         printRegPair(outs, "Canrestore",
-                                thread->readMiscReg(MISCREG_CANRESTORE),
+                                //thread->readMiscReg(MISCREG_CANRESTORE),
+                                thread->readIntReg(NumIntArchRegs + 4),
                                 shared_data->canrestore);
                         printRegPair(outs, "Otherwin",
-                                thread->readMiscReg(MISCREG_OTHERWIN),
+                                //thread->readMiscReg(MISCREG_OTHERWIN),
+                                thread->readIntReg(NumIntArchRegs + 5),
                                 shared_data->otherwin);
                         printRegPair(outs, "Cleanwin",
-                                thread->readMiscReg(MISCREG_CLEANWIN),
+                                //thread->readMiscReg(MISCREG_CLEANWIN),
+                                thread->readIntReg(NumIntArchRegs + 6),
                                 shared_data->cleanwin);
                         outs << endl;
                         for (int i = 1; i <= MaxTL; i++) {
diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh
index 0078db6..4a20862 100644
--- a/src/cpu/o3/alpha/cpu.hh
+++ b/src/cpu/o3/alpha/cpu.hh
@@ -37,12 +37,6 @@
 #include "cpu/o3/cpu.hh"
 #include "sim/byteswap.hh"
 
-namespace TheISA
-{
-    class ITB;
-    class DTB;
-}
-
 class EndQuiesceEvent;
 namespace Kernel {
     class Statistics;
@@ -61,14 +55,6 @@
 template <class Impl>
 class AlphaO3CPU : public FullO3CPU<Impl>
 {
-  protected:
-    typedef TheISA::IntReg IntReg;
-    typedef TheISA::FloatReg FloatReg;
-    typedef TheISA::FloatRegBits FloatRegBits;
-    typedef TheISA::MiscReg MiscReg;
-    typedef TheISA::RegFile RegFile;
-    typedef TheISA::MiscRegFile MiscRegFile;
-
   public:
     typedef O3ThreadState<Impl> ImplState;
     typedef O3ThreadState<Impl> Thread;
@@ -77,13 +63,6 @@
     /** Constructs an AlphaO3CPU with the given parameters. */
     AlphaO3CPU(Params *params);
 
-#if FULL_SYSTEM
-    /** ITB pointer. */
-    AlphaISA::ITB *itb;
-    /** DTB pointer. */
-    AlphaISA::DTB *dtb;
-#endif
-
     /** Registers statistics. */
     void regStats();
 
@@ -91,19 +70,19 @@
     /** Translates instruction requestion. */
     Fault translateInstReq(RequestPtr &req, Thread *thread)
     {
-        return itb->translate(req, thread->getTC());
+        return this->itb->translate(req, thread->getTC());
     }
 
     /** Translates data read request. */
     Fault translateDataReadReq(RequestPtr &req, Thread *thread)
     {
-        return dtb->translate(req, thread->getTC(), false);
+        return this->dtb->translate(req, thread->getTC(), false);
     }
 
     /** Translates data write request. */
     Fault translateDataWriteReq(RequestPtr &req, Thread *thread)
     {
-        return dtb->translate(req, thread->getTC(), true);
+        return this->dtb->translate(req, thread->getTC(), true);
     }
 
 #else
@@ -127,20 +106,22 @@
 
 #endif
     /** Reads a miscellaneous register. */
-    MiscReg readMiscReg(int misc_reg, unsigned tid);
+    TheISA::MiscReg readMiscReg(int misc_reg, unsigned tid);
 
     /** Reads a misc. register, including any side effects the read
      * might have as defined by the architecture.
      */
-    MiscReg readMiscRegWithEffect(int misc_reg, unsigned tid);
+    TheISA::MiscReg readMiscRegWithEffect(int misc_reg, unsigned tid);
 
     /** Sets a miscellaneous register. */
-    void setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
+    void setMiscReg(int misc_reg, const TheISA::MiscReg &val,
+            unsigned tid);
 
     /** Sets a misc. register, including any side effects the write
      * might have as defined by the architecture.
      */
-    void setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
+    void setMiscRegWithEffect(int misc_reg, const TheISA::MiscReg &val,
+            unsigned tid);
 
     /** Initiates a squash of all in-flight instructions for a given
      * thread.  The source of the squash is an external update of
@@ -175,10 +156,10 @@
      */
     void syscall(int64_t callnum, int tid);
     /** Gets a syscall argument. */
-    IntReg getSyscallArg(int i, int tid);
+    TheISA::IntReg getSyscallArg(int i, int tid);
 
     /** Used to shift args for indirect syscall. */
-    void setSyscallArg(int i, IntReg val, int tid);
+    void setSyscallArg(int i, TheISA::IntReg val, int tid);
 
     /** Sets the return value of a syscall. */
     void setSyscallReturn(SyscallReturn return_value, int tid);
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index 980e70f..41f1499 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -55,12 +55,7 @@
 #endif
 
 template <class Impl>
-AlphaO3CPU<Impl>::AlphaO3CPU(Params *params)
-#if FULL_SYSTEM
-    : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb)
-#else
-    : FullO3CPU<Impl>(params)
-#endif
+AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
 {
     DPRINTF(O3CPU, "Creating AlphaO3CPU object.\n");
 
@@ -173,15 +168,16 @@
 
 template <class Impl>
 void
-AlphaO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid)
+AlphaO3CPU<Impl>::setMiscReg(int misc_reg, const TheISA::MiscReg &val,
+        unsigned tid)
 {
     this->regFile.setMiscReg(misc_reg, val, tid);
 }
 
 template <class Impl>
 void
-AlphaO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val,
-                                       unsigned tid)
+AlphaO3CPU<Impl>::setMiscRegWithEffect(int misc_reg,
+        const TheISA::MiscReg &val, unsigned tid)
 {
     this->regFile.setMiscRegWithEffect(misc_reg, val, tid);
 }
@@ -312,7 +308,7 @@
 
 template <class Impl>
 void
-AlphaO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid)
+AlphaO3CPU<Impl>::setSyscallArg(int i, TheISA::IntReg val, int tid)
 {
     this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid);
 }
@@ -321,17 +317,6 @@
 void
 AlphaO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
 {
-    // check for error condition.  Alpha syscall convention is to
-    // indicate success/failure in reg a3 (r19) and put the
-    // return value itself in the standard return value reg (v0).
-    if (return_value.successful()) {
-        // no error
-        this->setArchIntReg(TheISA::SyscallSuccessReg, 0, tid);
-        this->setArchIntReg(TheISA::ReturnValueReg, return_value.value(), tid);
-    } else {
-        // got an error, return details
-        this->setArchIntReg(TheISA::SyscallSuccessReg, (IntReg) -1, tid);
-        this->setArchIntReg(TheISA::ReturnValueReg, -return_value.value(), tid);
-    }
+    TheISA::setSyscallReturn(return_value, this->tcBase(tid));
 }
 #endif
diff --git a/src/cpu/o3/alpha/dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh
index 49cc5a2..603a1b5 100644
--- a/src/cpu/o3/alpha/dyn_inst.hh
+++ b/src/cpu/o3/alpha/dyn_inst.hh
@@ -73,8 +73,9 @@
 
   public:
     /** BaseDynInst constructor given a binary instruction. */
-    AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
-                 O3CPU *cpu);
+    AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
+                 Addr Pred_PC, Addr Pred_NPC,
+                 InstSeqNum seq_num, O3CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
     AlphaDynInst(StaticInstPtr &_staticInst);
@@ -123,6 +124,44 @@
                                                this->threadNumber);
     }
 
+    /** Reads a miscellaneous register. */
+    TheISA::MiscReg readMiscRegOperand(const StaticInst *si, int idx)
+    {
+        return this->cpu->readMiscReg(
+                si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+                this->threadNumber);
+    }
+
+    /** Reads a misc. register, including any side-effects the read
+     * might have as defined by the architecture.
+     */
+    TheISA::MiscReg readMiscRegOperandWithEffect(const StaticInst *si, int idx)
+    {
+        return this->cpu->readMiscRegWithEffect(
+                si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+                this->threadNumber);
+    }
+
+    /** Sets a misc. register. */
+    void setMiscRegOperand(const StaticInst * si, int idx, const MiscReg &val)
+    {
+        this->instResult.integer = val;
+        return this->cpu->setMiscReg(
+                si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+                val, this->threadNumber);
+    }
+
+    /** Sets a misc. register, including any side-effects the write
+     * might have as defined by the architecture.
+     */
+    void setMiscRegOperandWithEffect(const StaticInst *si, int idx,
+                                     const MiscReg &val)
+    {
+        return this->cpu->setMiscRegWithEffect(
+                si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+                val, this->threadNumber);
+    }
+
 #if FULL_SYSTEM
     /** Calls hardware return from error interrupt. */
     Fault hwrei();
@@ -134,22 +173,6 @@
     void syscall(int64_t callnum);
 #endif
 
-  private:
-    /** Physical register index of the destination registers of this
-     *  instruction.
-     */
-    PhysRegIndex _destRegIdx[MaxInstDestRegs];
-
-    /** Physical register index of the source registers of this
-     *  instruction.
-     */
-    PhysRegIndex _srcRegIdx[MaxInstSrcRegs];
-
-    /** Physical register index of the previous producers of the
-     *  architected destinations.
-     */
-    PhysRegIndex _prevDestRegIdx[MaxInstDestRegs];
-
   public:
 
     // The register accessor methods provide the index of the
@@ -165,28 +188,28 @@
 
     uint64_t readIntRegOperand(const StaticInst *si, int idx)
     {
-        return this->cpu->readIntReg(_srcRegIdx[idx]);
+        return this->cpu->readIntReg(this->_srcRegIdx[idx]);
     }
 
     FloatReg readFloatRegOperand(const StaticInst *si, int idx, int width)
     {
-        return this->cpu->readFloatReg(_srcRegIdx[idx], width);
+        return this->cpu->readFloatReg(this->_srcRegIdx[idx], width);
     }
 
     FloatReg readFloatRegOperand(const StaticInst *si, int idx)
     {
-        return this->cpu->readFloatReg(_srcRegIdx[idx]);
+        return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
     }
 
     FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx,
                                          int width)
     {
-        return this->cpu->readFloatRegBits(_srcRegIdx[idx], width);
+        return this->cpu->readFloatRegBits(this->_srcRegIdx[idx], width);
     }
 
     FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
     {
-        return this->cpu->readFloatRegBits(_srcRegIdx[idx]);
+        return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
     }
 
     /** @todo: Make results into arrays so they can handle multiple dest
@@ -194,79 +217,37 @@
      */
     void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
     {
-        this->cpu->setIntReg(_destRegIdx[idx], val);
+        this->cpu->setIntReg(this->_destRegIdx[idx], val);
         BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
     }
 
     void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val,
                             int width)
     {
-        this->cpu->setFloatReg(_destRegIdx[idx], val, width);
+        this->cpu->setFloatReg(this->_destRegIdx[idx], val, width);
         BaseDynInst<Impl>::setFloatRegOperand(si, idx, val, width);
     }
 
     void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
     {
-        this->cpu->setFloatReg(_destRegIdx[idx], val);
+        this->cpu->setFloatReg(this->_destRegIdx[idx], val);
         BaseDynInst<Impl>::setFloatRegOperand(si, idx, val);
     }
 
     void setFloatRegOperandBits(const StaticInst *si, int idx,
                                 FloatRegBits val, int width)
     {
-        this->cpu->setFloatRegBits(_destRegIdx[idx], val, width);
+        this->cpu->setFloatRegBits(this->_destRegIdx[idx], val, width);
         BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
     }
 
     void setFloatRegOperandBits(const StaticInst *si, int idx,
                                 FloatRegBits val)
     {
-        this->cpu->setFloatRegBits(_destRegIdx[idx], val);
+        this->cpu->setFloatRegBits(this->_destRegIdx[idx], val);
         BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
     }
 
-    /** Returns the physical register index of the i'th destination
-     *  register.
-     */
-    PhysRegIndex renamedDestRegIdx(int idx) const
-    {
-        return _destRegIdx[idx];
-    }
-
-    /** Returns the physical register index of the i'th source register. */
-    PhysRegIndex renamedSrcRegIdx(int idx) const
-    {
-        return _srcRegIdx[idx];
-    }
-
-    /** Returns the physical register index of the previous physical register
-     *  that remapped to the same logical register index.
-     */
-    PhysRegIndex prevDestRegIdx(int idx) const
-    {
-        return _prevDestRegIdx[idx];
-    }
-
-    /** Renames a destination register to a physical register.  Also records
-     *  the previous physical register that the logical register mapped to.
-     */
-    void renameDestReg(int idx,
-                       PhysRegIndex renamed_dest,
-                       PhysRegIndex previous_rename)
-    {
-        _destRegIdx[idx] = renamed_dest;
-        _prevDestRegIdx[idx] = previous_rename;
-    }
-
-    /** Renames a source logical register to the physical register which
-     *  has/will produce that logical register's result.
-     *  @todo: add in whether or not the source register is ready.
-     */
-    void renameSrcReg(int idx, PhysRegIndex renamed_src)
-    {
-        _srcRegIdx[idx] = renamed_src;
-    }
-
   public:
     /** Calculates EA part of a memory instruction. Currently unused,
      * though it may be useful in the future if we want to split
diff --git a/src/cpu/o3/alpha/dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh
index 6fc548a..50cdec4 100644
--- a/src/cpu/o3/alpha/dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha/dyn_inst_impl.hh
@@ -31,9 +31,10 @@
 #include "cpu/o3/alpha/dyn_inst.hh"
 
 template <class Impl>
-AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
+AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
+                                 Addr Pred_PC, Addr Pred_NPC,
                                  InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
 {
     initVars();
 }
@@ -53,11 +54,11 @@
     // as the normal register entries.  It will allow the IQ to work
     // without any modifications.
     for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
-        _destRegIdx[i] = this->staticInst->destRegIdx(i);
+        this->_destRegIdx[i] = this->staticInst->destRegIdx(i);
     }
 
     for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
-        _srcRegIdx[i] = this->staticInst->srcRegIdx(i);
+        this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i);
         this->_readySrcRegIdx[i] = 0;
     }
 }
diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh
index bcecb70..6d61501 100644
--- a/src/cpu/o3/alpha/thread_context.hh
+++ b/src/cpu/o3/alpha/thread_context.hh
@@ -36,12 +36,6 @@
 {
   public:
 #if FULL_SYSTEM
-    /** Returns a pointer to the ITB. */
-    virtual AlphaISA::ITB *getITBPtr() { return this->cpu->itb; }
-
-    /** Returns a pointer to the DTB. */
-    virtual AlphaISA::DTB *getDTBPtr() { return this->cpu->dtb; }
-
     /** Returns pointer to the quiesce event. */
     virtual EndQuiesceEvent *getQuiesceEvent()
     {
@@ -51,8 +45,7 @@
 
     virtual uint64_t readNextNPC()
     {
-        panic("Alpha has no NextNPC!");
-        return 0;
+        return this->readNextPC() + sizeof(TheISA::MachInst);
     }
 
     virtual void setNextNPC(uint64_t val)
diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh
index 477c8e4..84c50b4 100644
--- a/src/cpu/o3/bpred_unit_impl.hh
+++ b/src/cpu/o3/bpred_unit_impl.hh
@@ -149,7 +149,7 @@
     using TheISA::MachInst;
 
     bool pred_taken = false;
-    Addr target;
+    Addr target = PC;
 
     ++lookups;
 
@@ -233,14 +233,7 @@
         }
     }
 
-    if (pred_taken) {
-        // Set the PC and the instruction's predicted target.
-        PC = target;
-        inst->setPredTarg(target);
-    } else {
-        PC = PC + sizeof(MachInst);
-        inst->setPredTarg(PC);
-    }
+    PC = target;
 
     predHist[tid].push_front(predict_record);
 
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index aa58fc2..d969190 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -87,9 +87,10 @@
     bool squash[Impl::MaxThreads];
     bool branchMispredict[Impl::MaxThreads];
     bool branchTaken[Impl::MaxThreads];
-    bool condDelaySlotBranch[Impl::MaxThreads];
+    bool squashDelaySlot[Impl::MaxThreads];
     uint64_t mispredPC[Impl::MaxThreads];
     uint64_t nextPC[Impl::MaxThreads];
+    uint64_t nextNPC[Impl::MaxThreads];
     InstSeqNum squashedSeqNum[Impl::MaxThreads];
 
     bool includeSquashInst[Impl::MaxThreads];
@@ -121,6 +122,7 @@
         bool branchTaken;
         uint64_t mispredPC;
         uint64_t nextPC;
+        uint64_t nextNPC;
 
         unsigned branchCount;
     };
@@ -160,6 +162,7 @@
         bool branchTaken;
         uint64_t mispredPC;
         uint64_t nextPC;
+        uint64_t nextNPC;
 
         // Represents the instruction that has either been retired or
         // squashed.  Similar to having a single bus that broadcasts the
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 483c2f7..f145792 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -519,6 +519,7 @@
     toIEW->commitInfo[tid].branchMispredict = false;
 
     toIEW->commitInfo[tid].nextPC = PC[tid];
+    toIEW->commitInfo[tid].nextNPC = nextPC[tid];
 }
 
 template <class Impl>
@@ -734,27 +735,15 @@
             InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
 
 #if ISA_HAS_DELAY_SLOT
-            InstSeqNum bdelay_done_seq_num;
-            bool squash_bdelay_slot;
+            InstSeqNum bdelay_done_seq_num = squashed_inst;
+            bool squash_bdelay_slot = fromIEW->squashDelaySlot[tid];
+            bool branchMispredict = fromIEW->branchMispredict[tid];
 
-            if (fromIEW->branchMispredict[tid]) {
-                if (fromIEW->branchTaken[tid] &&
-                    fromIEW->condDelaySlotBranch[tid]) {
-                    DPRINTF(Commit, "[tid:%i]: Cond. delay slot branch"
-                            "mispredicted as taken. Squashing after previous "
-                            "inst, [sn:%i]\n",
-                            tid, squashed_inst);
-                     bdelay_done_seq_num = squashed_inst;
-                     squash_bdelay_slot = true;
-                } else {
-                    DPRINTF(Commit, "[tid:%i]: Branch Mispredict. Squashing "
-                            "after delay slot [sn:%i]\n", tid, squashed_inst+1);
-                    bdelay_done_seq_num = squashed_inst + 1;
-                    squash_bdelay_slot = false;
-                }
-            } else {
-                bdelay_done_seq_num = squashed_inst;
-                squash_bdelay_slot = true;
+            // Squashing/not squashing the branch delay slot only makes
+            // sense when you're squashing from a branch, ie from a branch
+            // mispredict.
+            if (branchMispredict && !squash_bdelay_slot) {
+                bdelay_done_seq_num++;
             }
 #endif
 
@@ -793,6 +782,7 @@
                 fromIEW->branchTaken[tid];
 
             toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
+            toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
 
             toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
 
@@ -1122,7 +1112,7 @@
 
     // Update the commit rename map
     for (int i = 0; i < head_inst->numDestRegs(); i++) {
-        renameMap[tid]->setEntry(head_inst->destRegIdx(i),
+        renameMap[tid]->setEntry(head_inst->flattenedDestRegIdx(i),
                                  head_inst->renamedDestRegIdx(i));
     }
 
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 18cc87c..66c75a1 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -150,6 +150,10 @@
 template <class Impl>
 FullO3CPU<Impl>::FullO3CPU(Params *params)
     : BaseO3CPU(params),
+#if FULL_SYSTEM
+      itb(params->itb),
+      dtb(params->dtb),
+#endif
       tickEvent(this),
       removeInstsThisCycle(false),
       fetch(params),
@@ -657,9 +661,7 @@
     //Set PC/NPC/NNPC
     setPC(src_tc->readPC(), tid);
     setNextPC(src_tc->readNextPC(), tid);
-#if ISA_HAS_DELAY_SLOT
     setNextNPC(src_tc->readNextNPC(), tid);
-#endif
 
     src_tc->setStatus(ThreadContext::Active);
 
@@ -698,7 +700,7 @@
 
     // Squash Throughout Pipeline
     InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
-    fetch.squash(0, squash_seq_num, true, tid);
+    fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid);
     decode.squash(tid);
     rename.squash(squash_seq_num, tid);
     iew.squash(tid);
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 2bf9cb2..d217a3e 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -91,9 +91,6 @@
 class FullO3CPU : public BaseO3CPU
 {
   public:
-    typedef TheISA::FloatReg FloatReg;
-    typedef TheISA::FloatRegBits FloatRegBits;
-
     // Typedefs from the Impl here.
     typedef typename Impl::CPUPol CPUPolicy;
     typedef typename Impl::Params Params;
@@ -114,6 +111,11 @@
         SwitchedOut
     };
 
+#if FULL_SYSTEM
+    TheISA::ITB * itb;
+    TheISA::DTB * dtb;
+#endif
+
     /** Overall CPU status. */
     Status _status;
 
@@ -382,23 +384,23 @@
     /** Register accessors.  Index refers to the physical register index. */
     uint64_t readIntReg(int reg_idx);
 
-    FloatReg readFloatReg(int reg_idx);
+    TheISA::FloatReg readFloatReg(int reg_idx);
 
-    FloatReg readFloatReg(int reg_idx, int width);
+    TheISA::FloatReg readFloatReg(int reg_idx, int width);
 
-    FloatRegBits readFloatRegBits(int reg_idx);
+    TheISA::FloatRegBits readFloatRegBits(int reg_idx);
 
-    FloatRegBits readFloatRegBits(int reg_idx, int width);
+    TheISA::FloatRegBits readFloatRegBits(int reg_idx, int width);
 
     void setIntReg(int reg_idx, uint64_t val);
 
-    void setFloatReg(int reg_idx, FloatReg val);
+    void setFloatReg(int reg_idx, TheISA::FloatReg val);
 
-    void setFloatReg(int reg_idx, FloatReg val, int width);
+    void setFloatReg(int reg_idx, TheISA::FloatReg val, int width);
 
-    void setFloatRegBits(int reg_idx, FloatRegBits val);
+    void setFloatRegBits(int reg_idx, TheISA::FloatRegBits val);
 
-    void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
+    void setFloatRegBits(int reg_idx, TheISA::FloatRegBits val, int width);
 
     uint64_t readArchIntReg(int reg_idx, unsigned tid);
 
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 26ed40c..79a0bfd 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -282,6 +282,10 @@
     toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
     toFetch->decodeInfo[tid].squash = true;
     toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
+    ///FIXME There needs to be a way to set the nextPC and nextNPC
+    ///explicitly for ISAs with delay slots.
+    toFetch->decodeInfo[tid].nextNPC =
+        inst->branchTarget() + sizeof(TheISA::MachInst);
 #if ISA_HAS_DELAY_SLOT
     toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
         (inst->readNextPC() + sizeof(TheISA::MachInst));
@@ -743,9 +747,9 @@
 
         // Ensure that if it was predicted as a branch, it really is a
         // branch.
-        if (inst->predTaken() && !inst->isControl()) {
-            DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",inst->predPC,
-                    inst->nextPC + 4);
+        if (inst->readPredTaken() && !inst->isControl()) {
+            DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",
+                    inst->readPredPC(), inst->readNextPC() + 4);
 
             panic("Instruction predicted as a branch!");
 
@@ -762,26 +766,29 @@
         if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
             ++decodeBranchResolved;
 
-            if (inst->branchTarget() != inst->readPredTarg()) {
+            if (inst->branchTarget() != inst->readPredPC()) {
                 ++decodeBranchMispred;
 
                 // Might want to set some sort of boolean and just do
                 // a check at the end
 #if !ISA_HAS_DELAY_SLOT
                 squash(inst, inst->threadNumber);
-                inst->setPredTarg(inst->branchTarget());
+                Addr target = inst->branchTarget();
+                inst->setPredTarg(target, target + sizeof(TheISA::MachInst));
                 break;
 #else
                 // If mispredicted as taken, then ignore delay slot
                 // instruction... else keep delay slot and squash
                 // after it is sent to rename
-                if (inst->predTaken() && inst->isCondDelaySlot()) {
+                if (inst->readPredTaken() && inst->isCondDelaySlot()) {
                     DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst."
                             "[sn:%i] PC %#x mispredicted as taken.\n", tid,
                             inst->seqNum, inst->PC);
                     bdelayDoneSeqNum[tid] = inst->seqNum;
                     squash(inst, inst->threadNumber);
-                    inst->setPredTarg(inst->branchTarget());
+                    Addr target = inst->branchTarget();
+                    inst->setPredTarg(target,
+                            target + sizeof(TheISA::MachInst));
                     break;
                 } else {
                     DPRINTF(Decode, "[tid:%i]: Misprediction detected at "
@@ -800,7 +807,9 @@
         if (squashAfterDelaySlot[tid]) {
             assert(!inst->isSquashed());
             squash(squashInst[tid], squashInst[tid]->threadNumber);
-            squashInst[tid]->setPredTarg(squashInst[tid]->branchTarget());
+            Addr target = squashInst[tid]->branchTarget();
+            squashInst[tid]->setPredTarg(target,
+                    target + sizeof(TheISA::MachInst));
             assert(!inst->isSquashed());
             break;
         }
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index e880e14..8347ed7 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -239,13 +239,13 @@
     bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
 
     /** Squashes a specific thread and resets the PC. */
-    inline void doSquash(const Addr &new_PC, unsigned tid);
+    inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
 
     /** Squashes a specific thread and resets the PC. Also tells the CPU to
      * remove any instructions between fetch and decode that should be sqaushed.
      */
-    void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num,
-                          unsigned tid);
+    void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
+                          const InstSeqNum &seq_num, unsigned tid);
 
     /** Checks if a thread is stalled. */
     bool checkStall(unsigned tid) const;
@@ -259,7 +259,8 @@
      * remove any instructions that are not in the ROB. The source of this
      * squash should be the commit stage.
      */
-    void squash(const Addr &new_PC, const InstSeqNum &seq_num,
+    void squash(const Addr &new_PC, const Addr &new_NPC,
+                const InstSeqNum &seq_num,
                 bool squash_delay_slot, unsigned tid);
 
     /** Ticks the fetch stage, processing all inputs signals and fetching
@@ -361,19 +362,6 @@
     /** Tracks how many instructions has been fetched this cycle. */
     int numInst;
 
-    /** Tracks delay slot information for threads in ISAs which use
-     *	delay slots;
-     */
-    struct DelaySlotInfo {
-        InstSeqNum delaySlotSeqNum;
-        InstSeqNum branchSeqNum;
-        int numInsts;
-        Addr targetAddr;
-        bool targetReady;
-    };
-
-    DelaySlotInfo delaySlotInfo[Impl::MaxThreads];
-
     /** Source of possible stalls. */
     struct Stalls {
         bool decode;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index fe320fa..90d3868 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -319,9 +319,7 @@
     for (int tid = 0; tid < numThreads; tid++) {
         PC[tid] = cpu->readPC(tid);
         nextPC[tid] = cpu->readNextPC(tid);
-#if ISA_HAS_DELAY_SLOT
         nextNPC[tid] = cpu->readNextNPC(tid);
-#endif
     }
 
     // Size of cache block.
@@ -343,11 +341,6 @@
         cacheDataPC[tid] = 0;
         cacheDataValid[tid] = false;
 
-        delaySlotInfo[tid].branchSeqNum = -1;
-        delaySlotInfo[tid].numInsts = 0;
-        delaySlotInfo[tid].targetAddr = 0;
-        delaySlotInfo[tid].targetReady = false;
-
         stalls[tid].decode = false;
         stalls[tid].rename = false;
         stalls[tid].iew = false;
@@ -441,10 +434,8 @@
         nextPC[i] = cpu->readNextPC(i);
 #if ISA_HAS_DELAY_SLOT
         nextNPC[i] = cpu->readNextNPC(i);
-        delaySlotInfo[i].branchSeqNum = -1;
-        delaySlotInfo[i].numInsts = 0;
-        delaySlotInfo[i].targetAddr = 0;
-        delaySlotInfo[i].targetReady = false;
+#else
+        nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
 #endif
         fetchStatus[i] = Running;
     }
@@ -503,54 +494,41 @@
     bool predict_taken;
 
     if (!inst->isControl()) {
-#if ISA_HAS_DELAY_SLOT
-        Addr cur_PC = next_PC;
-        next_PC  = cur_PC + instSize;      //next_NPC;
-        next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
-        inst->setPredTarg(next_NPC);
-#else
-        next_PC = next_PC + instSize;
-        inst->setPredTarg(next_PC);
-#endif
+        next_PC  = next_NPC;
+        next_NPC = next_NPC + instSize;
+        inst->setPredTarg(next_PC, next_NPC);
+        inst->setPredTaken(false);
         return false;
     }
 
     int tid = inst->threadNumber;
-#if ISA_HAS_DELAY_SLOT
     Addr pred_PC = next_PC;
     predict_taken = branchPred.predict(inst, pred_PC, tid);
 
-    if (predict_taken) {
-        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+/*    if (predict_taken) {
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n",
+                tid, pred_PC);
     } else {
-        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
-    }
+        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
+    }*/
 
-    if (predict_taken) {
-        next_PC = next_NPC;
+#if ISA_HAS_DELAY_SLOT
+    next_PC = next_NPC;
+    if (predict_taken)
         next_NPC = pred_PC;
-
-        // Update delay slot info
-        ++delaySlotInfo[tid].numInsts;
-        delaySlotInfo[tid].targetAddr = pred_PC;
-        DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
-                delaySlotInfo[tid].numInsts);
-    } else { // !predict_taken
-        if (inst->isCondDelaySlot()) {
-            next_PC = pred_PC;
-            // The delay slot is skipped here if there is on
-            // prediction
-        } else {
-            next_PC = next_NPC;
-            // No need to declare a delay slot here since
-            // there is no for the pred. target to jump
-        }
-
-        next_NPC = next_NPC + instSize;
-    }
+    else
+        next_NPC += instSize;
 #else
-    predict_taken = branchPred.predict(inst, next_PC, tid);
+    if (predict_taken)
+        next_PC = pred_PC;
+    else
+        next_PC += instSize;
+    next_NPC = next_PC + instSize;
 #endif
+/*    DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
+            tid, next_PC, next_NPC);*/
+    inst->setPredTarg(next_PC, next_NPC);
+    inst->setPredTaken(predict_taken);
 
     ++fetchedBranches;
 
@@ -671,14 +649,15 @@
 
 template <class Impl>
 inline void
-DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
+DefaultFetch<Impl>::doSquash(const Addr &new_PC,
+        const Addr &new_NPC, unsigned tid)
 {
-    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
-            tid, new_PC);
+    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
+            tid, new_PC, new_NPC);
 
     PC[tid] = new_PC;
-    nextPC[tid] = new_PC + instSize;
-    nextNPC[tid] = new_PC + (2 * instSize);
+    nextPC[tid] = new_NPC;
+    nextNPC[tid] = new_NPC + instSize;
 
     // Clear the icache miss if it's outstanding.
     if (fetchStatus[tid] == IcacheWaitResponse) {
@@ -704,21 +683,13 @@
 
 template<class Impl>
 void
-DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
+DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
                                      const InstSeqNum &seq_num,
                                      unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
 
-    doSquash(new_PC, tid);
-
-#if ISA_HAS_DELAY_SLOT
-    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
-        delaySlotInfo[tid].numInsts = 0;
-        delaySlotInfo[tid].targetAddr = 0;
-        delaySlotInfo[tid].targetReady = false;
-    }
-#endif
+    doSquash(new_PC, new_NPC, tid);
 
     // Tell the CPU to remove any instructions that are in flight between
     // fetch and decode.
@@ -793,20 +764,15 @@
 
 template <class Impl>
 void
-DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
+                           const InstSeqNum &seq_num,
                            bool squash_delay_slot, unsigned tid)
 {
     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
 
-    doSquash(new_PC, tid);
+    doSquash(new_PC, new_NPC, tid);
 
 #if ISA_HAS_DELAY_SLOT
-    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
-        delaySlotInfo[tid].numInsts = 0;
-        delaySlotInfo[tid].targetAddr = 0;
-        delaySlotInfo[tid].targetReady = false;
-    }
-
     // Tell the CPU to remove any instructions that are not in the ROB.
     cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
 #else
@@ -929,6 +895,7 @@
 #endif
         // In any case, squash.
         squash(fromCommit->commitInfo[tid].nextPC,
+               fromCommit->commitInfo[tid].nextNPC,
                doneSeqNum,
                fromCommit->commitInfo[tid].squashDelaySlot,
                tid);
@@ -984,8 +951,12 @@
 #else
             InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
 #endif
+            DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
+                    fromDecode->decodeInfo[tid].nextPC,
+                    fromDecode->decodeInfo[tid].nextNPC);
             // Squash unless we're already squashing
             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
+                             fromDecode->decodeInfo[tid].nextNPC,
                              doneSeqNum,
                              tid);
 
@@ -1042,6 +1013,8 @@
     // The current PC.
     Addr &fetch_PC = PC[tid];
 
+    Addr &fetch_NPC = nextPC[tid];
+
     // Fault code for memory access.
     Fault fault = NoFault;
 
@@ -1098,7 +1071,8 @@
     }
 
     Addr next_PC = fetch_PC;
-    Addr next_NPC = next_PC + instSize;
+    Addr next_NPC = fetch_NPC;
+
     InstSeqNum inst_seq;
     MachInst inst;
     ExtMachInst ext_inst;
@@ -1117,15 +1091,22 @@
         // ended this fetch block.
         bool predicted_branch = false;
 
-        // Need to keep track of whether or not a delay slot
-        // instruction has been fetched
-
         for (;
              offset < cacheBlkSize &&
                  numInst < fetchWidth &&
-                 (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
+                 !predicted_branch;
              ++numInst) {
 
+            // If we're branching after this instruction, quite fetching
+            // from the same block then.
+            predicted_branch =
+                (fetch_PC + sizeof(TheISA::MachInst) != fetch_NPC);
+            if (predicted_branch) {
+                DPRINTF(Fetch, "Branch detected with PC = %#x, NPC = %#x\n",
+                        fetch_PC, fetch_NPC);
+            }
+
+
             // Get a sequence number.
             inst_seq = cpu->getAndIncrementInstSeq();
 
@@ -1145,8 +1126,9 @@
 #endif
 
             // Create a new DynInst from the instruction fetched.
-            DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
-                                                 next_PC,
+            DynInstPtr instruction = new DynInst(ext_inst,
+                                                 fetch_PC, fetch_NPC,
+                                                 next_PC, next_NPC,
                                                  inst_seq, cpu);
             instruction->setTid(tid);
 
@@ -1158,6 +1140,8 @@
                     "[sn:%lli]\n",
                     tid, instruction->readPC(), inst_seq);
 
+            //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
+
             DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
                     tid, instruction->staticInst->disassemble(fetch_PC));
 
@@ -1166,8 +1150,8 @@
                                      instruction->staticInst,
                                      instruction->readPC());
 
-            predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
-                                                     next_NPC);
+            lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
+            predicted_branch |= (next_PC != fetch_NPC);
 
             // Add instruction to the CPU's list of instructions.
             instruction->setInstListIt(cpu->addInst(instruction));
@@ -1183,6 +1167,7 @@
 
             // Move to the next instruction, unless we have a branch.
             fetch_PC = next_PC;
+            fetch_NPC = next_NPC;
 
             if (instruction->isQuiesce()) {
                 DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
@@ -1194,29 +1179,6 @@
             }
 
             offset += instSize;
-
-#if ISA_HAS_DELAY_SLOT
-            if (predicted_branch) {
-                delaySlotInfo[tid].branchSeqNum = inst_seq;
-
-                DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
-                        tid, inst_seq);
-                continue;
-            } else if (delaySlotInfo[tid].numInsts > 0) {
-                --delaySlotInfo[tid].numInsts;
-
-                // It's OK to set PC to target of branch
-                if (delaySlotInfo[tid].numInsts == 0) {
-                    delaySlotInfo[tid].targetReady = true;
-
-                    // Break the looping condition
-                    predicted_branch = true;
-                }
-
-                DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
-                        " process.\n", tid, delaySlotInfo[tid].numInsts);
-            }
-#endif
         }
 
         if (offset >= cacheBlkSize) {
@@ -1225,7 +1187,7 @@
         } else if (numInst >= fetchWidth) {
             DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
                     "for this cycle.\n", tid);
-        } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
+        } else if (predicted_branch) {
             DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
                     "instruction encountered.\n", tid);
         }
@@ -1238,26 +1200,13 @@
     // Now that fetching is completed, update the PC to signify what the next
     // cycle will be.
     if (fault == NoFault) {
+        PC[tid] = next_PC;
+        nextPC[tid] = next_NPC;
+        nextNPC[tid] = next_NPC + instSize;
 #if ISA_HAS_DELAY_SLOT
-        if (delaySlotInfo[tid].targetReady &&
-            delaySlotInfo[tid].numInsts == 0) {
-            // Set PC to target
-            PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
-            nextPC[tid] = next_PC + instSize;        //next_NPC
-            nextNPC[tid] = next_PC + (2 * instSize);
-
-            delaySlotInfo[tid].targetReady = false;
-        } else {
-            PC[tid] = next_PC;
-            nextPC[tid] = next_NPC;
-            nextNPC[tid] = next_NPC + instSize;
-        }
-
         DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
 #else
-        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
-        PC[tid] = next_PC;
-        nextPC[tid] = next_PC + instSize;
+        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
 #endif
     } else {
         // We shouldn't be in an icache miss and also have a fault (an ITB
@@ -1277,10 +1226,11 @@
         ext_inst = TheISA::NoopMachInst;
 
         // Create a new DynInst from the dummy nop.
-        DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
-                                             next_PC,
+        DynInstPtr instruction = new DynInst(ext_inst,
+                                             fetch_PC, fetch_NPC,
+                                             next_PC, next_NPC,
                                              inst_seq, cpu);
-        instruction->setPredTarg(next_PC + instSize);
+        instruction->setPredTarg(next_PC, next_NPC);
         instruction->setTid(tid);
 
         instruction->setASID(tid);
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index a8962f2..f24eaf2 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -480,23 +480,37 @@
     toCommit->mispredPC[tid] = inst->readPC();
     toCommit->branchMispredict[tid] = true;
 
+    int instSize = sizeof(TheISA::MachInst);
 #if ISA_HAS_DELAY_SLOT
-    bool branch_taken = inst->readNextNPC() !=
-        (inst->readNextPC() + sizeof(TheISA::MachInst));
+    bool branch_taken =
+        !(inst->readNextPC() + instSize == inst->readNextNPC() &&
+          (inst->readNextPC() == inst->readPC() + instSize ||
+           inst->readNextPC() == inst->readPC() + 2 * instSize));
+    DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n",
+            branch_taken ? "true": "false", inst->seqNum);
 
     toCommit->branchTaken[tid] = branch_taken;
 
-    toCommit->condDelaySlotBranch[tid] = inst->isCondDelaySlot();
-
-    if (inst->isCondDelaySlot() && branch_taken) {
+    bool squashDelaySlot = true;
+//	(inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
+    DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n",
+            squashDelaySlot ? "true": "false", inst->seqNum);
+    toCommit->squashDelaySlot[tid] = squashDelaySlot;
+    //If we're squashing the delay slot, we need to pick back up at NextPC.
+    //Otherwise, NextPC isn't being squashed, so we should pick back up at
+    //NextNPC.
+    if (squashDelaySlot) {
         toCommit->nextPC[tid] = inst->readNextPC();
+        toCommit->nextNPC[tid] = inst->readNextNPC();
     } else {
         toCommit->nextPC[tid] = inst->readNextNPC();
+        toCommit->nextNPC[tid] = inst->readNextNPC() + instSize;
     }
 #else
     toCommit->branchTaken[tid] = inst->readNextPC() !=
         (inst->readPC() + sizeof(TheISA::MachInst));
     toCommit->nextPC[tid] = inst->readNextPC();
+    toCommit->nextNPC[tid] = inst->readNextPC() + instSize;
 #endif
 
     toCommit->includeSquashInst[tid] = false;
@@ -514,6 +528,11 @@
     toCommit->squash[tid] = true;
     toCommit->squashedSeqNum[tid] = inst->seqNum;
     toCommit->nextPC[tid] = inst->readNextPC();
+#if ISA_HAS_DELAY_SLOT
+    toCommit->nextNPC[tid] = inst->readNextNPC();
+#else
+    toCommit->nextNPC[tid] = inst->readNextPC() + sizeof(TheISA::MachInst);
+#endif
     toCommit->branchMispredict[tid] = false;
 
     toCommit->includeSquashInst[tid] = false;
@@ -531,6 +550,11 @@
     toCommit->squash[tid] = true;
     toCommit->squashedSeqNum[tid] = inst->seqNum;
     toCommit->nextPC[tid] = inst->readPC();
+#if ISA_HAS_DELAY_SLOT
+    toCommit->nextNPC[tid] = inst->readNextPC();
+#else
+    toCommit->nextNPC[tid] = inst->readPC() + sizeof(TheISA::MachInst);
+#endif
     toCommit->branchMispredict[tid] = false;
 
     // Must include the broadcasted SN in the squash.
@@ -1341,17 +1365,15 @@
                 fetchRedirect[tid] = true;
 
                 DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
-#if ISA_HAS_DELAY_SLOT
-                DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
-                        inst->nextNPC);
-#else
-                DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
-                        inst->nextPC);
-#endif
+                DPRINTF(IEW, "Predicted target was %#x, %#x.\n",
+                        inst->readPredPC(), inst->readPredNPC());
+                DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x,"
+                        " NPC: %#x.\n", inst->readNextPC(),
+                        inst->readNextNPC());
                 // If incorrect, then signal the ROB that it must be squashed.
                 squashDueToBranch(inst, tid);
 
-                if (inst->predTaken()) {
+                if (inst->readPredTaken()) {
                     predictedTakenIncorrect++;
                 } else {
                     predictedNotTakenIncorrect++;
@@ -1421,7 +1443,7 @@
     // mark scoreboard that this instruction is finally complete.
     // Either have IEW have direct access to scoreboard, or have this
     // as part of backwards communication.
-    for (int inst_num = 0; inst_num < issueWidth &&
+    for (int inst_num = 0; inst_num < wbWidth &&
              toCommit->insts[inst_num]; inst_num++) {
         DynInstPtr inst = toCommit->insts[inst_num];
         int tid = inst->threadNumber;
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 14f9d50..9c7eb77 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -559,6 +559,12 @@
             // Cast this to type T?
             data = storeQueue[store_idx].data >> shift_amt;
 
+            // When the data comes from the store queue entry, it's in host
+            // order. When it gets sent to the load, it needs to be in guest
+            // order so when the load converts it again, it ends up back
+            // in host order like the inst expects.
+            data = TheISA::htog(data);
+
             assert(!load_inst->memData);
             load_inst->memData = new uint8_t[64];
 
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 3b84d34..ebd9301 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -597,8 +597,21 @@
 
         assert(!inst->memData);
         inst->memData = new uint8_t[64];
-        memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data,
-               req->getSize());
+
+        TheISA::IntReg convertedData =
+            TheISA::htog(storeQueue[storeWBIdx].data);
+
+        //FIXME This is a hack to get SPARC working. It, along with endianness
+        //in the memory system in general, need to be straightened out more
+        //formally. The problem is that the data's endianness is swapped when
+        //it's in the 64 bit data field in the store queue. The data that you
+        //want won't start at the beginning of the field anymore unless it was
+        //a 64 bit access.
+        memcpy(inst->memData,
+                (uint8_t *)&convertedData +
+                (TheISA::ByteOrderDiffers ?
+                 (sizeof(TheISA::IntReg) - req->getSize()) : 0),
+                req->getSize());
 
         PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
         data_pkt->dataStatic(inst->memData);
@@ -612,7 +625,7 @@
         DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
                 "to Addr:%#x, data:%#x [sn:%lli]\n",
                 storeWBIdx, inst->readPC(),
-                req->getPaddr(), *(inst->memData),
+                req->getPaddr(), (int)*(inst->memData),
                 inst->seqNum);
 
         // @todo: Remove this SC hack once the memory system handles it.
diff --git a/src/cpu/o3/mips/cpu_impl.hh b/src/cpu/o3/mips/cpu_impl.hh
index 08e9ba4..e7dbd3a 100644
--- a/src/cpu/o3/mips/cpu_impl.hh
+++ b/src/cpu/o3/mips/cpu_impl.hh
@@ -220,16 +220,6 @@
 void
 MipsO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
 {
-    // check for error condition.
-    if (return_value.successful()) {
-        // no error
-        this->setArchIntReg(TheISA::SyscallSuccessReg, 0, tid);
-        this->setArchIntReg(TheISA::ReturnValueReg, return_value.value(), tid);
-    } else {
-        // got an error, return details
-        this->setArchIntReg(TheISA::SyscallSuccessReg,
-                (TheISA::IntReg) -1, tid);
-        this->setArchIntReg(TheISA::ReturnValueReg, -return_value.value(), tid);
-    }
+    TheISA::setSyscallReturn(return_value, this->tcBase(tid));
 }
 #endif
diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh
index 833371e..f535309 100755
--- a/src/cpu/o3/mips/dyn_inst.hh
+++ b/src/cpu/o3/mips/dyn_inst.hh
@@ -54,10 +54,6 @@
     /** Typedef for the CPU. */
     typedef typename Impl::O3CPU O3CPU;
 
-    /** Binary machine instruction type. */
-    typedef TheISA::MachInst MachInst;
-    /** Extended machine instruction type. */
-    typedef TheISA::ExtMachInst ExtMachInst;
     /** Logical register index type. */
     typedef TheISA::RegIndex RegIndex;
     /** Integer register index type. */
@@ -74,8 +70,10 @@
 
   public:
     /** BaseDynInst constructor given a binary instruction. */
-    MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
-                 O3CPU *cpu);
+    MipsDynInst(ExtMachInst inst,
+                Addr PC, Addr NPC,
+                Addr Pred_PC, Addr Pred_NPC,
+                InstSeqNum seq_num, O3CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
     MipsDynInst(StaticInstPtr &_staticInst);
@@ -127,22 +125,6 @@
     /** Calls a syscall. */
     void syscall(int64_t callnum);
 
-  private:
-    /** Physical register index of the destination registers of this
-     *  instruction.
-     */
-    PhysRegIndex _destRegIdx[MaxInstDestRegs];
-
-    /** Physical register index of the source registers of this
-     *  instruction.
-     */
-    PhysRegIndex _srcRegIdx[MaxInstSrcRegs];
-
-    /** Physical register index of the previous producers of the
-     *  architected destinations.
-     */
-    PhysRegIndex _prevDestRegIdx[MaxInstDestRegs];
-
   public:
 
     // The register accessor methods provide the index of the
@@ -158,28 +140,28 @@
 
     uint64_t readIntRegOperand(const StaticInst *si, int idx)
     {
-        return this->cpu->readIntReg(_srcRegIdx[idx]);
+        return this->cpu->readIntReg(this->_srcRegIdx[idx]);
     }
 
     FloatReg readFloatRegOperand(const StaticInst *si, int idx, int width)
     {
-        return this->cpu->readFloatReg(_srcRegIdx[idx], width);
+        return this->cpu->readFloatReg(this->_srcRegIdx[idx], width);
     }
 
     FloatReg readFloatRegOperand(const StaticInst *si, int idx)
     {
-        return this->cpu->readFloatReg(_srcRegIdx[idx]);
+        return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
     }
 
     FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx,
                                          int width)
     {
-        return this->cpu->readFloatRegBits(_srcRegIdx[idx], width);
+        return this->cpu->readFloatRegBits(this->_srcRegIdx[idx], width);
     }
 
     FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
     {
-        return this->cpu->readFloatRegBits(_srcRegIdx[idx]);
+        return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
     }
 
     /** @todo: Make results into arrays so they can handle multiple dest
@@ -187,79 +169,37 @@
      */
     void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
     {
-        this->cpu->setIntReg(_destRegIdx[idx], val);
+        this->cpu->setIntReg(this->_destRegIdx[idx], val);
         BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
     }
 
     void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val,
                             int width)
     {
-        this->cpu->setFloatReg(_destRegIdx[idx], val, width);
+        this->cpu->setFloatReg(this->_destRegIdx[idx], val, width);
         BaseDynInst<Impl>::setFloatRegOperand(si, idx, val, width);
     }
 
     void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
     {
-        this->cpu->setFloatReg(_destRegIdx[idx], val);
+        this->cpu->setFloatReg(this->_destRegIdx[idx], val);
         BaseDynInst<Impl>::setFloatRegOperand(si, idx, val);
     }
 
     void setFloatRegOperandBits(const StaticInst *si, int idx,
                                 FloatRegBits val, int width)
     {
-        this->cpu->setFloatRegBits(_destRegIdx[idx], val, width);
+        this->cpu->setFloatRegBits(this->_destRegIdx[idx], val, width);
         BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
     }
 
     void setFloatRegOperandBits(const StaticInst *si, int idx,
                                 FloatRegBits val)
     {
-        this->cpu->setFloatRegBits(_destRegIdx[idx], val);
+        this->cpu->setFloatRegBits(this->_destRegIdx[idx], val);
         BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
     }
 
-    /** Returns the physical register index of the i'th destination
-     *  register.
-     */
-    PhysRegIndex renamedDestRegIdx(int idx) const
-    {
-        return _destRegIdx[idx];
-    }
-
-    /** Returns the physical register index of the i'th source register. */
-    PhysRegIndex renamedSrcRegIdx(int idx) const
-    {
-        return _srcRegIdx[idx];
-    }
-
-    /** Returns the physical register index of the previous physical register
-     *  that remapped to the same logical register index.
-     */
-    PhysRegIndex prevDestRegIdx(int idx) const
-    {
-        return _prevDestRegIdx[idx];
-    }
-
-    /** Renames a destination register to a physical register.  Also records
-     *  the previous physical register that the logical register mapped to.
-     */
-    void renameDestReg(int idx,
-                       PhysRegIndex renamed_dest,
-                       PhysRegIndex previous_rename)
-    {
-        _destRegIdx[idx] = renamed_dest;
-        _prevDestRegIdx[idx] = previous_rename;
-    }
-
-    /** Renames a source logical register to the physical register which
-     *  has/will produce that logical register's result.
-     *  @todo: add in whether or not the source register is ready.
-     */
-    void renameSrcReg(int idx, PhysRegIndex renamed_src)
-    {
-        _srcRegIdx[idx] = renamed_src;
-    }
-
   public:
     /** Calculates EA part of a memory instruction. Currently unused,
      * though it may be useful in the future if we want to split
diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh
index 5bc01b9..c0f9ae7 100755
--- a/src/cpu/o3/mips/dyn_inst_impl.hh
+++ b/src/cpu/o3/mips/dyn_inst_impl.hh
@@ -31,9 +31,11 @@
 #include "cpu/o3/mips/dyn_inst.hh"
 
 template <class Impl>
-MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
-                                 InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst,
+                               Addr PC, Addr NPC,
+                               Addr Pred_PC, Addr Pred_NPC,
+                               InstSeqNum seq_num, O3CPU *cpu)
+    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
 {
     initVars();
 }
@@ -53,11 +55,11 @@
     // as the normal register entries.  It will allow the IQ to work
     // without any modifications.
     for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
-        _destRegIdx[i] = this->staticInst->destRegIdx(i);
+        this->_destRegIdx[i] = this->staticInst->destRegIdx(i);
     }
 
     for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
-        _srcRegIdx[i] = this->staticInst->srcRegIdx(i);
+        this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i);
         this->_readySrcRegIdx[i] = 0;
     }
 }
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 177b9cb..6b4628f 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -411,6 +411,14 @@
     /** Whether or not rename needs to block this cycle. */
     bool blockThisCycle;
 
+    /** Whether or not rename needs to resume a serialize instruction
+     * after squashing. */
+    bool resumeSerialize;
+
+    /** Whether or not rename needs to resume clearing out the skidbuffer
+     * after squashing. */
+    bool resumeUnblocking;
+
     /** The number of threads active in rename. */
     unsigned numThreads;
 
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 3a8e503..e303f1c 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -31,6 +31,8 @@
 
 #include <list>
 
+#include "arch/isa_traits.hh"
+#include "arch/regfile.hh"
 #include "config/full_system.hh"
 #include "cpu/o3/rename.hh"
 
@@ -41,6 +43,8 @@
       commitToRenameDelay(params->commitToRenameDelay),
       renameWidth(params->renameWidth),
       commitWidth(params->commitWidth),
+      resumeSerialize(false),
+      resumeUnblocking(false),
       numThreads(params->numberOfThreads),
       maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
 {
@@ -332,12 +336,22 @@
     // If it still needs to block, the blocking should happen the next
     // cycle and there should be space to hold everything due to the squash.
     if (renameStatus[tid] == Blocked ||
-        renameStatus[tid] == Unblocking ||
-        renameStatus[tid] == SerializeStall) {
-
+        renameStatus[tid] == Unblocking) {
         toDecode->renameUnblock[tid] = 1;
 
+        resumeSerialize = false;
         serializeInst[tid] = NULL;
+    } else if (renameStatus[tid] == SerializeStall) {
+        if (serializeInst[tid]->seqNum <= squash_seq_num) {
+            DPRINTF(Rename, "Rename will resume serializing after squash\n");
+            resumeSerialize = true;
+            assert(serializeInst[tid]);
+        } else {
+            resumeSerialize = false;
+            toDecode->renameUnblock[tid] = 1;
+
+            serializeInst[tid] = NULL;
+        }
     }
 
     // Set the status to Squashing.
@@ -392,6 +406,9 @@
         }
         slist_it++;
     }
+    resumeUnblocking = (skidBuffer[tid].size() != 0);
+    DPRINTF(Rename, "Resume unblocking set to %s\n",
+            resumeUnblocking ? "true" : "false");
 #else
     skidBuffer[tid].clear();
 #endif
@@ -476,6 +493,20 @@
         ++renameSquashCycles;
     } else if (renameStatus[tid] == SerializeStall) {
         ++renameSerializeStallCycles;
+        // If we are currently in SerializeStall and resumeSerialize
+        // was set, then that means that we are resuming serializing
+        // this cycle.  Tell the previous stages to block.
+        if (resumeSerialize) {
+            resumeSerialize = false;
+            block(tid);
+            toDecode->renameUnblock[tid] = false;
+        }
+    } else if (renameStatus[tid] == Unblocking) {
+        if (resumeUnblocking) {
+            block(tid);
+            resumeUnblocking = false;
+            toDecode->renameUnblock[tid] = false;
+        }
     }
 
     if (renameStatus[tid] == Running ||
@@ -741,7 +772,17 @@
     }
 
     if (skidBuffer[tid].size() > skidBufferMax)
+    {
+        typename InstQueue::iterator it;
+        warn("Skidbuffer contents:\n");
+        for(it = skidBuffer[tid].begin(); it != skidBuffer[tid].end(); it++)
+        {
+            warn("[tid:%u]: %s [sn:%i].\n", tid,
+                    (*it)->staticInst->disassemble(inst->readPC()),
+                    (*it)->seqNum);
+        }
         panic("Skidbuffer Exceeded Max Size");
+    }
 }
 
 template <class Impl>
@@ -830,7 +871,10 @@
     // Only signal backwards to block if the previous stages do not think
     // rename is already blocked.
     if (renameStatus[tid] != Blocked) {
-        if (renameStatus[tid] != Unblocking) {
+        // If resumeUnblocking is set, we unblocked during the squash,
+        // but now we're have unblocking status. We need to tell earlier
+        // stages to block.
+        if (resumeUnblocking || renameStatus[tid] != Unblocking) {
             toDecode->renameBlock[tid] = true;
             toDecode->renameUnblock[tid] = false;
             wroteToTimeBuffer = true;
@@ -963,13 +1007,19 @@
     // Will need to mark dependencies though.
     for (int src_idx = 0; src_idx < num_src_regs; src_idx++) {
         RegIndex src_reg = inst->srcRegIdx(src_idx);
+        RegIndex flat_src_reg = src_reg;
+        if (src_reg < TheISA::FP_Base_DepTag) {
+            flat_src_reg = TheISA::flattenIntIndex(inst->tcBase(), src_reg);
+            DPRINTF(Rename, "Flattening index %d to %d.\n", (int)src_reg, (int)flat_src_reg);
+        }
+        inst->flattenSrcReg(src_idx, flat_src_reg);
 
         // Look up the source registers to get the phys. register they've
         // been renamed to, and set the sources to those registers.
-        PhysRegIndex renamed_reg = renameMap[tid]->lookup(src_reg);
+        PhysRegIndex renamed_reg = renameMap[tid]->lookup(flat_src_reg);
 
         DPRINTF(Rename, "[tid:%u]: Looking up arch reg %i, got "
-                "physical reg %i.\n", tid, (int)src_reg,
+                "physical reg %i.\n", tid, (int)flat_src_reg,
                 (int)renamed_reg);
 
         inst->renameSrcReg(src_idx, renamed_reg);
@@ -996,20 +1046,27 @@
     // Rename the destination registers.
     for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) {
         RegIndex dest_reg = inst->destRegIdx(dest_idx);
+        RegIndex flat_dest_reg = dest_reg;
+        if (dest_reg < TheISA::FP_Base_DepTag) {
+            flat_dest_reg = TheISA::flattenIntIndex(inst->tcBase(), dest_reg);
+            DPRINTF(Rename, "Flattening index %d to %d.\n", (int)dest_reg, (int)flat_dest_reg);
+        }
+
+        inst->flattenDestReg(dest_idx, flat_dest_reg);
 
         // Get the physical register that the destination will be
         // renamed to.
-        rename_result = renameMap[tid]->rename(dest_reg);
+        rename_result = renameMap[tid]->rename(flat_dest_reg);
 
         //Mark Scoreboard entry as not ready
         scoreboard->unsetReg(rename_result.first);
 
         DPRINTF(Rename, "[tid:%u]: Renaming arch reg %i to physical "
-                "reg %i.\n", tid, (int)dest_reg,
+                "reg %i.\n", tid, (int)flat_dest_reg,
                 (int)rename_result.first);
 
         // Record the rename information so that a history can be kept.
-        RenameHistory hb_entry(inst->seqNum, dest_reg,
+        RenameHistory hb_entry(inst->seqNum, flat_dest_reg,
                                rename_result.first,
                                rename_result.second);
 
@@ -1233,12 +1290,24 @@
     if (renameStatus[tid] == Squashing) {
         // Switch status to running if rename isn't being told to block or
         // squash this cycle.
-        DPRINTF(Rename, "[tid:%u]: Done squashing, switching to running.\n",
-                tid);
+        if (resumeSerialize) {
+            DPRINTF(Rename, "[tid:%u]: Done squashing, switching to serialize.\n",
+                    tid);
 
-        renameStatus[tid] = Running;
+            renameStatus[tid] = SerializeStall;
+            return true;
+        } else if (resumeUnblocking) {
+            DPRINTF(Rename, "[tid:%u]: Done squashing, switching to unblocking.\n",
+                    tid);
+            renameStatus[tid] = Unblocking;
+            return true;
+        } else {
+            DPRINTF(Rename, "[tid:%u]: Done squashing, switching to running.\n",
+                    tid);
 
-        return false;
+            renameStatus[tid] = Running;
+            return false;
+        }
     }
 
     if (renameStatus[tid] == SerializeStall) {
diff --git a/src/cpu/o3/sparc/cpu.hh b/src/cpu/o3/sparc/cpu.hh
index c4df798..08ebd27 100644
--- a/src/cpu/o3/sparc/cpu.hh
+++ b/src/cpu/o3/sparc/cpu.hh
@@ -37,12 +37,6 @@
 #include "cpu/o3/cpu.hh"
 #include "sim/byteswap.hh"
 
-namespace TheISA
-{
-    class ITB;
-    class DTB;
-}
-
 class EndQuiesceEvent;
 namespace Kernel {
     class Statistics;
@@ -61,14 +55,6 @@
 template <class Impl>
 class SparcO3CPU : public FullO3CPU<Impl>
 {
-  protected:
-    typedef TheISA::IntReg IntReg;
-    typedef TheISA::FloatReg FloatReg;
-    typedef TheISA::FloatRegBits FloatRegBits;
-    typedef TheISA::MiscReg MiscReg;
-    typedef TheISA::RegFile RegFile;
-    typedef TheISA::MiscRegFile MiscRegFile;
-
   public:
     typedef O3ThreadState<Impl> ImplState;
     typedef O3ThreadState<Impl> Thread;
@@ -77,13 +63,6 @@
     /** Constructs an AlphaO3CPU with the given parameters. */
     SparcO3CPU(Params *params);
 
-#if FULL_SYSTEM
-    /** ITB pointer. */
-    SparcISA::ITB *itb;
-    /** DTB pointer. */
-    SparcISA::DTB *dtb;
-#endif
-
     /** Registers statistics. */
     void regStats();
 
@@ -91,19 +70,19 @@
     /** Translates instruction requestion. */
     Fault translateInstReq(RequestPtr &req, Thread *thread)
     {
-        return itb->translate(req, thread->getTC());
+        return this->itb->translate(req, thread->getTC());
     }
 
     /** Translates data read request. */
     Fault translateDataReadReq(RequestPtr &req, Thread *thread)
     {
-        return dtb->translate(req, thread->getTC(), false);
+        return this->dtb->translate(req, thread->getTC(), false);
     }
 
     /** Translates data write request. */
     Fault translateDataWriteReq(RequestPtr &req, Thread *thread)
     {
-        return dtb->translate(req, thread->getTC(), true);
+        return this->dtb->translate(req, thread->getTC(), true);
     }
 
 #else
@@ -127,20 +106,21 @@
 
 #endif
     /** Reads a miscellaneous register. */
-    MiscReg readMiscReg(int misc_reg, unsigned tid);
+    TheISA::MiscReg readMiscReg(int misc_reg, unsigned tid);
 
     /** Reads a misc. register, including any side effects the read
      * might have as defined by the architecture.
      */
-    MiscReg readMiscRegWithEffect(int misc_reg, unsigned tid);
+    TheISA::MiscReg readMiscRegWithEffect(int misc_reg, unsigned tid);
 
     /** Sets a miscellaneous register. */
-    void setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
+    void setMiscReg(int misc_reg, const TheISA::MiscReg &val, unsigned tid);
 
     /** Sets a misc. register, including any side effects the write
      * might have as defined by the architecture.
      */
-    void setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
+    void setMiscRegWithEffect(int misc_reg, const TheISA::MiscReg &val,
+            unsigned tid);
 
     /** Initiates a squash of all in-flight instructions for a given
      * thread.  The source of the squash is an external update of
@@ -175,10 +155,10 @@
      */
     void syscall(int64_t callnum, int tid);
     /** Gets a syscall argument. */
-    IntReg getSyscallArg(int i, int tid);
+    TheISA::IntReg getSyscallArg(int i, int tid);
 
     /** Used to shift args for indirect syscall. */
-    void setSyscallArg(int i, IntReg val, int tid);
+    void setSyscallArg(int i, TheISA::IntReg val, int tid);
 
     /** Sets the return value of a syscall. */
     void setSyscallReturn(SyscallReturn return_value, int tid);
@@ -204,4 +184,4 @@
     bool lockFlag;
 };
 
-#endif // __CPU_O3_ALPHA_CPU_HH__
+#endif // __CPU_O3_SPARC_CPU_HH__
diff --git a/src/cpu/o3/sparc/cpu_builder.cc b/src/cpu/o3/sparc/cpu_builder.cc
index 81f419e..3cac89b 100644
--- a/src/cpu/o3/sparc/cpu_builder.cc
+++ b/src/cpu/o3/sparc/cpu_builder.cc
@@ -55,8 +55,8 @@
 #if FULL_SYSTEM
     SimObjectParam<System *> system;
     Param<int> cpu_id;
-    SimObjectParam<AlphaISA::ITB *> itb;
-    SimObjectParam<AlphaISA::DTB *> dtb;
+    SimObjectParam<SparcISA::ITB *> itb;
+    SimObjectParam<SparcISA::DTB *> dtb;
     Param<Tick> profile;
 
     Param<bool> do_quiesce;
diff --git a/src/cpu/o3/sparc/cpu_impl.hh b/src/cpu/o3/sparc/cpu_impl.hh
index 66bf7d1..c039a8f 100644
--- a/src/cpu/o3/sparc/cpu_impl.hh
+++ b/src/cpu/o3/sparc/cpu_impl.hh
@@ -55,12 +55,7 @@
 #endif
 
 template <class Impl>
-SparcO3CPU<Impl>::SparcO3CPU(Params *params)
-#if FULL_SYSTEM
-    : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb)
-#else
-    : FullO3CPU<Impl>(params)
-#endif
+SparcO3CPU<Impl>::SparcO3CPU(Params *params) : FullO3CPU<Impl>(params)
 {
     DPRINTF(O3CPU, "Creating SparcO3CPU object.\n");
 
@@ -172,15 +167,16 @@
 
 template <class Impl>
 void
-SparcO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid)
+SparcO3CPU<Impl>::setMiscReg(int misc_reg,
+        const SparcISA::MiscReg &val, unsigned tid)
 {
     this->regFile.setMiscReg(misc_reg, val, tid);
 }
 
 template <class Impl>
 void
-SparcO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val,
-                                       unsigned tid)
+SparcO3CPU<Impl>::setMiscRegWithEffect(int misc_reg,
+        const SparcISA::MiscReg &val, unsigned tid)
 {
     this->regFile.setMiscRegWithEffect(misc_reg, val, tid);
 }
@@ -284,35 +280,24 @@
 TheISA::IntReg
 SparcO3CPU<Impl>::getSyscallArg(int i, int tid)
 {
-    return this->readArchIntReg(SparcISA::ArgumentReg0 + i, tid);
+    TheISA::IntReg idx = TheISA::flattenIntIndex(this->tcBase(tid),
+            SparcISA::ArgumentReg0 + i);
+    return this->readArchIntReg(idx, tid);
 }
 
 template <class Impl>
 void
-SparcO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid)
+SparcO3CPU<Impl>::setSyscallArg(int i, TheISA::IntReg val, int tid)
 {
-    this->setArchIntReg(SparcISA::ArgumentReg0 + i, val, tid);
+    TheISA::IntReg idx = TheISA::flattenIntIndex(this->tcBase(tid),
+            SparcISA::ArgumentReg0 + i);
+    this->setArchIntReg(idx, val, tid);
 }
 
 template <class Impl>
 void
 SparcO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
 {
-    // check for error condition.  SPARC syscall convention is to
-    // indicate success/failure in reg the carry bit of the ccr
-    // and put the return value itself in the standard return value reg ().
-    if (return_value.successful()) {
-        // no error, clear XCC.C
-        this->setMiscReg(SparcISA::MISCREG_CCR,
-                this->readMiscReg(SparcISA::MISCREG_CCR, tid) & 0xEE, tid);
-        this->setArchIntReg(SparcISA::ReturnValueReg,
-                return_value.value(), tid);
-    } else {
-        // got an error, set XCC.C
-        this->setMiscReg(SparcISA::MISCREG_CCR,
-                this->readMiscReg(SparcISA::MISCREG_CCR, tid) | 0x11, tid);
-        this->setArchIntReg(SparcISA::ReturnValueReg,
-                return_value.value(), tid);
-    }
+    TheISA::setSyscallReturn(return_value, this->tcBase(tid));
 }
 #endif
diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh
index f8d6bb6..e95ae2f 100644
--- a/src/cpu/o3/sparc/dyn_inst.hh
+++ b/src/cpu/o3/sparc/dyn_inst.hh
@@ -32,6 +32,7 @@
 #define __CPU_O3_SPARC_DYN_INST_HH__
 
 #include "arch/sparc/isa_traits.hh"
+#include "arch/sparc/types.hh"
 #include "cpu/base_dyn_inst.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/o3/sparc/cpu.hh"
@@ -55,8 +56,8 @@
 
   public:
     /** BaseDynInst constructor given a binary instruction. */
-    SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
-            Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu);
+    SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
+            Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
     SparcDynInst(StaticInstPtr &_staticInst);
@@ -105,6 +106,45 @@
                                                this->threadNumber);
     }
 
+    /** Reads a miscellaneous register. */
+    TheISA::MiscReg readMiscRegOperand(const StaticInst *si, int idx)
+    {
+        return this->cpu->readMiscReg(
+                si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+                this->threadNumber);
+    }
+
+    /** Reads a misc. register, including any side-effects the read
+     * might have as defined by the architecture.
+     */
+    TheISA::MiscReg readMiscRegOperandWithEffect(const StaticInst *si, int idx)
+    {
+        return this->cpu->readMiscRegWithEffect(
+                si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+                this->threadNumber);
+    }
+
+    /** Sets a misc. register. */
+    void setMiscRegOperand(const StaticInst * si,
+            int idx, const TheISA::MiscReg &val)
+    {
+        this->instResult.integer = val;
+        return this->cpu->setMiscReg(
+                si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+                val, this->threadNumber);
+    }
+
+    /** Sets a misc. register, including any side-effects the write
+     * might have as defined by the architecture.
+     */
+    void setMiscRegOperandWithEffect(
+            const StaticInst *si, int idx, const TheISA::MiscReg &val)
+    {
+        return this->cpu->setMiscRegWithEffect(
+                si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
+                val, this->threadNumber);
+    }
+
 #if FULL_SYSTEM
     /** Calls hardware return from error interrupt. */
     Fault hwrei();
@@ -116,22 +156,6 @@
     void syscall(int64_t callnum);
 #endif
 
-  private:
-    /** Physical register index of the destination registers of this
-     *  instruction.
-     */
-    PhysRegIndex _destRegIdx[TheISA::MaxInstDestRegs];
-
-    /** Physical register index of the source registers of this
-     *  instruction.
-     */
-    PhysRegIndex _srcRegIdx[TheISA::MaxInstSrcRegs];
-
-    /** Physical register index of the previous producers of the
-     *  architected destinations.
-     */
-    PhysRegIndex _prevDestRegIdx[TheISA::MaxInstDestRegs];
-
   public:
 
     // The register accessor methods provide the index of the
@@ -145,108 +169,70 @@
     // storage (which is pretty hard to imagine they would have reason
     // to do).
 
-    uint64_t readIntReg(const StaticInst *si, int idx)
+    uint64_t readIntRegOperand(const StaticInst *si, int idx)
     {
-        return this->cpu->readIntReg(_srcRegIdx[idx]);
+        uint64_t val = this->cpu->readIntReg(this->_srcRegIdx[idx]);
+        DPRINTF(Sparc, "Reading int reg %d (%d, %d) as %x\n", (int)this->_flatSrcRegIdx[idx], (int)this->_srcRegIdx[idx], idx, val);
+        return this->cpu->readIntReg(this->_srcRegIdx[idx]);
     }
 
-    TheISA::FloatReg readFloatReg(const StaticInst *si, int idx, int width)
-    {
-        return this->cpu->readFloatReg(_srcRegIdx[idx], width);
-    }
-
-    TheISA::FloatReg readFloatReg(const StaticInst *si, int idx)
-    {
-        return this->cpu->readFloatReg(_srcRegIdx[idx]);
-    }
-
-    TheISA::FloatRegBits readFloatRegBits(const StaticInst *si,
+    TheISA::FloatReg readFloatRegOperand(const StaticInst *si,
             int idx, int width)
     {
-        return this->cpu->readFloatRegBits(_srcRegIdx[idx], width);
+        return this->cpu->readFloatReg(this->_srcRegIdx[idx], width);
     }
 
-    TheISA::FloatRegBits readFloatRegBits(const StaticInst *si, int idx)
+    TheISA::FloatReg readFloatRegOperand(const StaticInst *si, int idx)
     {
-        return this->cpu->readFloatRegBits(_srcRegIdx[idx]);
+        return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
+    }
+
+    TheISA::FloatRegBits readFloatRegOperandBits(const StaticInst *si,
+            int idx, int width)
+    {
+        return this->cpu->readFloatRegBits(this->_srcRegIdx[idx], width);
+    }
+
+    TheISA::FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
+    {
+        return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
     }
 
     /** @todo: Make results into arrays so they can handle multiple dest
      *  registers.
      */
-    void setIntReg(const StaticInst *si, int idx, uint64_t val)
+    void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
     {
-        this->cpu->setIntReg(_destRegIdx[idx], val);
-        BaseDynInst<Impl>::setIntReg(si, idx, val);
+        DPRINTF(Sparc, "Setting int reg %d (%d, %d) to %x\n", (int)this->_flatDestRegIdx[idx], (int)this->_destRegIdx[idx], idx, val);
+        this->cpu->setIntReg(this->_destRegIdx[idx], val);
+        BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
     }
 
-    void setFloatReg(const StaticInst *si, int idx,
+    void setFloatRegOperand(const StaticInst *si, int idx,
             TheISA::FloatReg val, int width)
     {
-        this->cpu->setFloatReg(_destRegIdx[idx], val, width);
-        BaseDynInst<Impl>::setFloatReg(si, idx, val, width);
+        this->cpu->setFloatReg(this->_destRegIdx[idx], val, width);
+        BaseDynInst<Impl>::setFloatRegOperand(si, idx, val, width);
     }
 
-    void setFloatReg(const StaticInst *si, int idx, TheISA::FloatReg val)
+    void setFloatRegOperand(const StaticInst *si, int idx, TheISA::FloatReg val)
     {
-        this->cpu->setFloatReg(_destRegIdx[idx], val);
-        BaseDynInst<Impl>::setFloatReg(si, idx, val);
+        this->cpu->setFloatReg(this->_destRegIdx[idx], val);
+        BaseDynInst<Impl>::setFloatRegOperand(si, idx, val);
     }
 
-    void setFloatRegBits(const StaticInst *si, int idx,
+    void setFloatRegOperandBits(const StaticInst *si, int idx,
             TheISA::FloatRegBits val, int width)
     {
-        this->cpu->setFloatRegBits(_destRegIdx[idx], val, width);
-        BaseDynInst<Impl>::setFloatRegBits(si, idx, val);
+        this->cpu->setFloatRegBits(this->_destRegIdx[idx], val, width);
+        BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
     }
 
-    void setFloatRegBits(const StaticInst *si,
+    void setFloatRegOperandBits(const StaticInst *si,
             int idx, TheISA::FloatRegBits val)
     {
-        this->cpu->setFloatRegBits(_destRegIdx[idx], val);
-        BaseDynInst<Impl>::setFloatRegBits(si, idx, val);
-    }
-
-    /** Returns the physical register index of the i'th destination
-     *  register.
-     */
-    PhysRegIndex renamedDestRegIdx(int idx) const
-    {
-        return _destRegIdx[idx];
-    }
-
-    /** Returns the physical register index of the i'th source register. */
-    PhysRegIndex renamedSrcRegIdx(int idx) const
-    {
-        return _srcRegIdx[idx];
-    }
-
-    /** Returns the physical register index of the previous physical register
-     *  that remapped to the same logical register index.
-     */
-    PhysRegIndex prevDestRegIdx(int idx) const
-    {
-        return _prevDestRegIdx[idx];
-    }
-
-    /** Renames a destination register to a physical register.  Also records
-     *  the previous physical register that the logical register mapped to.
-     */
-    void renameDestReg(int idx,
-                       PhysRegIndex renamed_dest,
-                       PhysRegIndex previous_rename)
-    {
-        _destRegIdx[idx] = renamed_dest;
-        _prevDestRegIdx[idx] = previous_rename;
-    }
-
-    /** Renames a source logical register to the physical register which
-     *  has/will produce that logical register's result.
-     *  @todo: add in whether or not the source register is ready.
-     */
-    void renameSrcReg(int idx, PhysRegIndex renamed_src)
-    {
-        _srcRegIdx[idx] = renamed_src;
+        this->cpu->setFloatRegBits(this->_destRegIdx[idx], val);
+        BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
     }
 
   public:
diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh
index 210daac..c4d30b6 100644
--- a/src/cpu/o3/sparc/dyn_inst_impl.hh
+++ b/src/cpu/o3/sparc/dyn_inst_impl.hh
@@ -31,9 +31,10 @@
 #include "cpu/o3/sparc/dyn_inst.hh"
 
 template <class Impl>
-SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst, Addr PC,
-        Addr Pred_PC, InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
+        Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
+        InstSeqNum seq_num, O3CPU *cpu)
+    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
 {
     initVars();
 }
@@ -53,11 +54,11 @@
     // as the normal register entries.  It will allow the IQ to work
     // without any modifications.
     for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
-        _destRegIdx[i] = this->staticInst->destRegIdx(i);
+        this->_destRegIdx[i] = this->staticInst->destRegIdx(i);
     }
 
     for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
-        _srcRegIdx[i] = this->staticInst->srcRegIdx(i);
+        this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i);
         this->_readySrcRegIdx[i] = 0;
     }
 }
@@ -126,7 +127,8 @@
 bool
 SparcDynInst<Impl>::simPalCheck(int palFunc)
 {
-    return this->cpu->simPalCheck(palFunc, this->threadNumber);
+    panic("simPalCheck called, but there's no PAL in SPARC!\n");
+    return false;
 }
 #else
 template <class Impl>
diff --git a/src/cpu/o3/sparc/thread_context.hh b/src/cpu/o3/sparc/thread_context.hh
index 69d1e2d..7497959 100644
--- a/src/cpu/o3/sparc/thread_context.hh
+++ b/src/cpu/o3/sparc/thread_context.hh
@@ -36,12 +36,6 @@
 {
   public:
 #if FULL_SYSTEM
-    /** Returns a pointer to the ITB. */
-    virtual SparcISA::ITB *getITBPtr() { return this->cpu->itb; }
-
-    /** Returns a pointer to the DTB. */
-    virtual SparcISA::DTB *getDTBPtr() { return this->cpu->dtb; }
-
     /** Returns pointer to the quiesce event. */
     virtual EndQuiesceEvent *getQuiesceEvent()
     {
@@ -62,7 +56,7 @@
     virtual void changeRegFileContext(TheISA::RegContextParam param,
                                       TheISA::RegContextVal val)
     {
-        panic("This doesn't make sense!\n");
+        //XXX Ignore this for now. This -really- needs to get fixed.
     }
 
 
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index 390569c..4987d6e 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -66,6 +66,14 @@
     /** Pointer to the thread state that this TC corrseponds to. */
     O3ThreadState<Impl> *thread;
 
+#if FULL_SYSTEM
+    /** Returns a pointer to the ITB. */
+    TheISA::ITB *getITBPtr() { return cpu->itb; }
+
+    /** Returns a pointer to the DTB. */
+    TheISA::DTB *getDTBPtr() { return cpu->dtb; }
+#endif
+
     /** Returns a pointer to this CPU. */
     virtual BaseCPU *getCpuPtr() { return cpu; }
 
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index afebf29..af98fa1 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -29,6 +29,7 @@
  *          Korey Sewell
  */
 
+#include "arch/regfile.hh"
 #include "cpu/o3/thread_context.hh"
 #include "cpu/quiesce_event.hh"
 
@@ -305,6 +306,7 @@
 uint64_t
 O3ThreadContext<Impl>::readIntReg(int reg_idx)
 {
+    reg_idx = TheISA::flattenIntIndex(this, reg_idx);
     return cpu->readArchIntReg(reg_idx, thread->readTid());
 }
 
@@ -349,6 +351,7 @@
 void
 O3ThreadContext<Impl>::setIntReg(int reg_idx, uint64_t val)
 {
+    reg_idx = TheISA::flattenIntIndex(this, reg_idx);
     cpu->setArchIntReg(reg_idx, val, thread->readTid());
 
     // Squash if we're not already in a state update mode.
diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh
index 0da446c..baea7a5 100644
--- a/src/cpu/ozone/cpu.hh
+++ b/src/cpu/ozone/cpu.hh
@@ -219,11 +219,19 @@
 
         uint64_t readNextNPC()
         {
-            return 0;
+#if ISA_HAS_DELAY_SLOT
+            panic("Ozone needs to support nextNPC");
+#else
+            return thread->nextPC + sizeof(TheISA::MachInst);
+#endif
         }
 
         void setNextNPC(uint64_t val)
-        { }
+        {
+#if ISA_HAS_DELAY_SLOT
+            panic("Ozone needs to support nextNPC");
+#endif
+        }
 
       public:
         // ISA stuff:
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 31fd009..c4853b9 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -54,15 +54,15 @@
 }
 class MemObject;
 
-class RemoteGDB;
-class GDBListener;
-
 #else
 
 class Process;
 
 #endif // FULL_SYSTEM
 
+class RemoteGDB;
+class GDBListener;
+
 class ThreadContext;
 class Checkpoint;
 
@@ -304,6 +304,31 @@
         return thread->setMiscRegWithEffect(misc_reg, val);
     }
 
+    MiscReg readMiscRegOperand(const StaticInst *si, int idx)
+    {
+        int reg_idx = si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag;
+        return thread->readMiscReg(reg_idx);
+    }
+
+    MiscReg readMiscRegOperandWithEffect(const StaticInst *si, int idx)
+    {
+        int reg_idx = si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag;
+        return thread->readMiscRegWithEffect(reg_idx);
+    }
+
+    void setMiscRegOperand(const StaticInst *si, int idx, const MiscReg &val)
+    {
+        int reg_idx = si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag;
+        return thread->setMiscReg(reg_idx, val);
+    }
+
+    void setMiscRegOperandWithEffect(
+            const StaticInst *si, int idx, const MiscReg &val)
+    {
+        int reg_idx = si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag;
+        return thread->setMiscRegWithEffect(reg_idx, val);
+    }
+
 #if FULL_SYSTEM
     Fault hwrei() { return thread->hwrei(); }
     void ev5_trap(Fault fault) { fault->invoke(tc); }
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index 10bbe29..f2f79c0 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -33,6 +33,8 @@
 #define __CPU_SIMPLE_THREAD_HH__
 
 #include "arch/isa_traits.hh"
+#include "arch/regfile.hh"
+#include "arch/syscallreturn.hh"
 #include "config/full_system.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/thread_state.hh"
@@ -250,7 +252,7 @@
     //
     uint64_t readIntReg(int reg_idx)
     {
-        return regs.readIntReg(reg_idx);
+        return regs.readIntReg(TheISA::flattenIntIndex(getTC(), reg_idx));
     }
 
     FloatReg readFloatReg(int reg_idx, int width)
@@ -275,7 +277,7 @@
 
     void setIntReg(int reg_idx, uint64_t val)
     {
-        regs.setIntReg(reg_idx, val);
+        regs.setIntReg(TheISA::flattenIntIndex(getTC(), reg_idx), val);
     }
 
     void setFloatReg(int reg_idx, FloatReg val, int width)
@@ -376,18 +378,20 @@
 #if !FULL_SYSTEM
     TheISA::IntReg getSyscallArg(int i)
     {
-        return regs.readIntReg(TheISA::ArgumentReg0 + i);
+        return regs.readIntReg(TheISA::flattenIntIndex(getTC(),
+                    TheISA::ArgumentReg0 + i));
     }
 
     // used to shift args for indirect syscall
     void setSyscallArg(int i, TheISA::IntReg val)
     {
-        regs.setIntReg(TheISA::ArgumentReg0 + i, val);
+        regs.setIntReg(TheISA::flattenIntIndex(getTC(),
+                    TheISA::ArgumentReg0 + i), val);
     }
 
     void setSyscallReturn(SyscallReturn return_value)
     {
-        TheISA::setSyscallReturn(return_value, &regs);
+        TheISA::setSyscallReturn(return_value, getTC());
     }
 
     void syscall(int64_t callnum)
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index bb9cc9e..16e491f 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -32,13 +32,13 @@
 #define __CPU_THREAD_CONTEXT_HH__
 
 #include "arch/regfile.hh"
-#include "arch/syscallreturn.hh"
 #include "arch/types.hh"
 #include "config/full_system.hh"
 #include "mem/request.hh"
 #include "sim/faults.hh"
 #include "sim/host.hh"
 #include "sim/serialize.hh"
+#include "sim/syscallreturn.hh"
 #include "sim/byteswap.hh"
 
 // @todo: Figure out a more architecture independent way to obtain the ITB and
diff --git a/src/sim/byteswap.hh b/src/sim/byteswap.hh
index da427f1..cbc0b50 100644
--- a/src/sim/byteswap.hh
+++ b/src/sim/byteswap.hh
@@ -57,6 +57,8 @@
 #include <libkern/OSByteOrder.h>
 #endif
 
+enum ByteOrder {BigEndianByteOrder, LittleEndianByteOrder};
+
 //These functions actually perform the swapping for parameters
 //of various bit lengths
 static inline uint64_t
@@ -131,11 +133,13 @@
 //For conversions not involving the guest system, we can define the functions
 //conditionally based on the BYTE_ORDER macro and outside of the namespaces
 #if defined(_BIG_ENDIAN) || !defined(_LITTLE_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
+const ByteOrder HostByteOrder = BigEndianByteOrder;
 template <typename T> static inline T htole(T value) {return swap_byte(value);}
 template <typename T> static inline T letoh(T value) {return swap_byte(value);}
 template <typename T> static inline T htobe(T value) {return value;}
 template <typename T> static inline T betoh(T value) {return value;}
 #elif defined(_LITTLE_ENDIAN) || BYTE_ORDER == LITTLE_ENDIAN
+const ByteOrder HostByteOrder = LittleEndianByteOrder;
 template <typename T> static inline T htole(T value) {return value;}
 template <typename T> static inline T letoh(T value) {return value;}
 template <typename T> static inline T htobe(T value) {return swap_byte(value);}
@@ -146,33 +150,35 @@
 
 namespace BigEndianGuest
 {
-        template <typename T>
-        static inline T gtole(T value) {return betole(value);}
-        template <typename T>
-        static inline T letog(T value) {return letobe(value);}
-        template <typename T>
-        static inline T gtobe(T value) {return value;}
-        template <typename T>
-        static inline T betog(T value) {return value;}
-        template <typename T>
-        static inline T htog(T value) {return htobe(value);}
-        template <typename T>
-        static inline T gtoh(T value) {return betoh(value);}
+    const bool ByteOrderDiffers = (HostByteOrder != BigEndianByteOrder);
+    template <typename T>
+    static inline T gtole(T value) {return betole(value);}
+    template <typename T>
+    static inline T letog(T value) {return letobe(value);}
+    template <typename T>
+    static inline T gtobe(T value) {return value;}
+    template <typename T>
+    static inline T betog(T value) {return value;}
+    template <typename T>
+    static inline T htog(T value) {return htobe(value);}
+    template <typename T>
+    static inline T gtoh(T value) {return betoh(value);}
 }
 
 namespace LittleEndianGuest
 {
-        template <typename T>
-        static inline T gtole(T value) {return value;}
-        template <typename T>
-        static inline T letog(T value) {return value;}
-        template <typename T>
-        static inline T gtobe(T value) {return letobe(value);}
-        template <typename T>
-        static inline T betog(T value) {return betole(value);}
-        template <typename T>
-        static inline T htog(T value) {return htole(value);}
-        template <typename T>
-        static inline T gtoh(T value) {return letoh(value);}
+    const bool ByteOrderDiffers = (HostByteOrder != LittleEndianByteOrder);
+    template <typename T>
+    static inline T gtole(T value) {return value;}
+    template <typename T>
+    static inline T letog(T value) {return value;}
+    template <typename T>
+    static inline T gtobe(T value) {return letobe(value);}
+    template <typename T>
+    static inline T betog(T value) {return betole(value);}
+    template <typename T>
+    static inline T htog(T value) {return htole(value);}
+    template <typename T>
+    static inline T gtoh(T value) {return letoh(value);}
 }
 #endif // __SIM_BYTE_SWAP_HH__
diff --git a/src/sim/process.cc b/src/sim/process.cc
index 63ff339..e5d8681 100644
--- a/src/sim/process.cc
+++ b/src/sim/process.cc
@@ -35,6 +35,7 @@
 
 #include <string>
 
+#include "arch/remote_gdb.hh"
 #include "base/intmath.hh"
 #include "base/loader/object_file.hh"
 #include "base/loader/symtab.hh"
@@ -154,6 +155,13 @@
     int myIndex = threadContexts.size();
     threadContexts.push_back(tc);
 
+    RemoteGDB *rgdb = new RemoteGDB(system, tc);
+    GDBListener *gdbl = new GDBListener(rgdb, 7000 + myIndex);
+    gdbl->listen();
+    //gdbl->accept();
+
+    remoteGDB.push_back(rgdb);
+
     // return CPU number to caller
     return myIndex;
 }
diff --git a/src/sim/process.hh b/src/sim/process.hh
index 616c02c..bf65c6e 100644
--- a/src/sim/process.hh
+++ b/src/sim/process.hh
@@ -51,6 +51,11 @@
 class PageTable;
 class TranslatingPort;
 class System;
+class GDBListener;
+namespace TheISA
+{
+    class RemoteGDB;
+}
 
 void
 copyStringArray(std::vector<std::string> &strings, Addr array_ptr,
@@ -72,6 +77,11 @@
     // thread contexts associated with this process
     std::vector<ThreadContext *> threadContexts;
 
+    // remote gdb objects
+    std::vector<TheISA::RemoteGDB *> remoteGDB;
+    std::vector<GDBListener *> gdbListen;
+    bool breakpoint();
+
     // number of CPUs (esxec contexts, really) assigned to this process.
     unsigned int numCpus() { return threadContexts.size(); }
 
diff --git a/src/sim/syscallreturn.hh b/src/sim/syscallreturn.hh
new file mode 100644
index 0000000..d1c43f5
--- /dev/null
+++ b/src/sim/syscallreturn.hh
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __SIM_SYSCALLRETURN_HH__
+#define __SIM_SYSCALLRETURN_HH__
+
+#include <inttypes.h>
+
+class SyscallReturn
+{
+  public:
+    template <class T>
+    SyscallReturn(T v, bool s)
+    {
+        retval = (uint64_t)v;
+        success = s;
+    }
+
+    template <class T>
+    SyscallReturn(T v)
+    {
+        success = (v >= 0);
+        retval = (uint64_t)v;
+    }
+
+    ~SyscallReturn() {}
+
+    SyscallReturn& operator=(const SyscallReturn& s)
+    {
+        retval = s.retval;
+        success = s.success;
+        return *this;
+    }
+
+    bool successful() { return success; }
+    uint64_t value() { return retval; }
+
+    private:
+    uint64_t retval;
+    bool success;
+};
+
+#endif
diff --git a/src/sim/system.cc b/src/sim/system.cc
index b3ba1b8..f6febe4 100644
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@@ -32,6 +32,7 @@
  */
 
 #include "arch/isa_traits.hh"
+#include "arch/remote_gdb.hh"
 #include "base/loader/object_file.hh"
 #include "base/loader/symtab.hh"
 #include "base/trace.hh"
@@ -43,7 +44,6 @@
 #include "sim/system.hh"
 #if FULL_SYSTEM
 #include "arch/vtophys.hh"
-#include "arch/remote_gdb.hh"
 #include "kern/kernel_stats.hh"
 #endif
 
@@ -141,14 +141,8 @@
 #endif // FULL_SYSTEM}
 }
 
-#if FULL_SYSTEM
-
-
 int rgdb_wait = -1;
 
-#endif // FULL_SYSTEM
-
-
 void
 System::setMemoryMode(MemoryMode mode)
 {
@@ -156,6 +150,11 @@
     memoryMode = mode;
 }
 
+bool System::breakpoint()
+{
+    return remoteGDB[0]->breakpoint();
+}
+
 int
 System::registerThreadContext(ThreadContext *tc, int id)
 {
@@ -175,7 +174,6 @@
     threadContexts[id] = tc;
     numcpus++;
 
-#if FULL_SYSTEM
     RemoteGDB *rgdb = new RemoteGDB(this, tc);
     GDBListener *gdbl = new GDBListener(rgdb, 7000 + id);
     gdbl->listen();
@@ -191,7 +189,6 @@
     }
 
     remoteGDB[id] = rgdb;
-#endif // FULL_SYSTEM
 
     return id;
 }
@@ -213,9 +210,7 @@
     }
 
     threadContexts[id] = tc;
-#if FULL_SYSTEM
     remoteGDB[id]->replaceThreadContext(tc);
-#endif // FULL_SYSTEM
 }
 
 #if !FULL_SYSTEM
diff --git a/src/sim/system.hh b/src/sim/system.hh
index b3a67bf..758da70 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -55,12 +55,12 @@
 
 #if FULL_SYSTEM
 class Platform;
+#endif
 class GDBListener;
 namespace TheISA
 {
     class RemoteGDB;
 }
-#endif
 
 class System : public SimObject
 {
@@ -159,11 +159,9 @@
 
 #endif
   public:
-#if FULL_SYSTEM
     std::vector<TheISA::RemoteGDB *> remoteGDB;
     std::vector<GDBListener *> gdbListen;
-    virtual bool breakpoint() = 0;
-#endif // FULL_SYSTEM
+    bool breakpoint();
 
   public:
     struct Params
diff --git a/src/unittest/Makefile b/src/unittest/Makefile
index 0c11b95..e22b80b 100644
--- a/src/unittest/Makefile
+++ b/src/unittest/Makefile
@@ -1,4 +1,4 @@
-# Copyright (c) 2006 The Regents of The University of Michigan
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
index 4b06871..2296e25 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
@@ -7,9 +7,6 @@
 output_file=cout
 progress_interval=0
 
-[debug]
-break_cycles=
-
 [exetrace]
 intel_format=false
 legion_lockstep=false
@@ -109,6 +106,7 @@
 numROBEntries=192
 numRobs=1
 numThreads=1
+phase=0
 predType=tournament
 progress_interval=0
 renameToDecodeDelay=1
@@ -390,6 +388,7 @@
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
+cwd=
 egid=100
 env=
 euid=100
@@ -415,6 +414,7 @@
 file=
 latency=1
 range=0:134217727
+zero=false
 port=system.membus.port[0]
 
 [trace]
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
index e593f63..1b1b58f 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out
@@ -10,6 +10,7 @@
 file=
 range=[0,134217727]
 latency=1
+zero=false
 
 [system]
 type=System
@@ -30,6 +31,7 @@
 input=cin
 output=cout
 env=
+cwd=
 system=system
 uid=100
 euid=100
@@ -169,6 +171,7 @@
 [system.cpu]
 type=DerivO3CPU
 clock=1
+phase=0
 numThreads=1
 activity=0
 workload=system.cpu.workload
@@ -409,9 +412,6 @@
 legion_lockstep=false
 trace_system=client
 
-[debug]
-break_cycles=
-
 [statsreset]
 reset_cycle=0
 
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
index 8b8a254..4e3fdbc 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
@@ -8,10 +8,10 @@
 global.BPredUnit.condPredicted                   1563                       # Number of conditional branches predicted
 global.BPredUnit.lookups                         5229                       # Number of BP lookups
 global.BPredUnit.usedRAS                         2821                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  15743                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 180184                       # Number of bytes of host memory used
-host_seconds                                     0.36                       # Real time elapsed on the host
-host_tick_rate                                3916768                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  11609                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 177052                       # Number of bytes of host memory used
+host_seconds                                     0.48                       # Real time elapsed on the host
+host_tick_rate                                2887871                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 23                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores               117                       # Number of conflicting stores.
 memdepunit.memDep.insertedLoads                  3775                       # Number of loads inserted to the mem dependence unit.
@@ -73,7 +73,7 @@
 system.cpu.dcache.WriteReq_mshr_miss_latency       375299                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             73                       # number of WriteReq MSHR misses
-system.cpu.dcache.avg_blocked_cycles_no_mshrs     no value                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets  3366.651163                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_refs                  11.587209                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
@@ -263,8 +263,8 @@
 system.cpu.iew.lsq.thread.0.squashedLoads         2796                       # Number of loads squashed
 system.cpu.iew.lsq.thread.0.squashedStores         2922                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             40                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          281                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            120                       # Number of branches that were predicted taken incorrectly
+system.cpu.iew.predictedNotTakenIncorrect          279                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            122                       # Number of branches that were predicted taken incorrectly
 system.cpu.ipc                               0.004016                       # IPC: Instructions Per Cycle
 system.cpu.ipc_total                         0.004016                       # IPC: Total IPC of All Threads
 system.cpu.iq.ISSUE:FU_type_0                   13840                       # Type of FU issued
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
index 87866a2..eb1796e 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stderr
@@ -1 +1,2 @@
+0: system.remote_gdb.listener: listening for remote gdb on port 7000
 warn: Entering event queue @ 0.  Starting simulation...
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
index 4c2593f..511bc59 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
@@ -6,8 +6,8 @@
 All Rights Reserved
 
 
-M5 compiled Nov 12 2006 23:25:38
-M5 started Sun Nov 12 23:25:46 2006
-M5 executing on zizzer.eecs.umich.edu
-command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
+M5 compiled Jan 22 2007 23:06:52
+M5 started Mon Jan 22 23:06:54 2007
+M5 executing on ewok
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
 Exiting @ tick 1400135 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
index 724e282..db88e76 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
@@ -7,9 +7,6 @@
 output_file=cout
 progress_interval=0
 
-[debug]
-break_cycles=
-
 [exetrace]
 intel_format=false
 legion_lockstep=false
@@ -109,6 +106,7 @@
 numROBEntries=192
 numRobs=1
 numThreads=1
+phase=0
 predType=tournament
 progress_interval=0
 renameToDecodeDelay=1
@@ -390,6 +388,7 @@
 [system.cpu.workload]
 type=LiveProcess
 cmd=hello
+cwd=
 egid=100
 env=
 euid=100
@@ -415,6 +414,7 @@
 file=
 latency=1
 range=0:134217727
+zero=false
 port=system.membus.port[0]
 
 [trace]
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
index 83eecc3..9ee1931 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out
@@ -10,6 +10,7 @@
 file=
 range=[0,134217727]
 latency=1
+zero=false
 
 [system]
 type=System
@@ -30,6 +31,7 @@
 input=cin
 output=cout
 env=
+cwd=
 system=system
 uid=100
 euid=100
@@ -169,6 +171,7 @@
 [system.cpu]
 type=DerivO3CPU
 clock=1
+phase=0
 numThreads=1
 activity=0
 workload=system.cpu.workload
@@ -409,9 +412,6 @@
 legion_lockstep=false
 trace_system=client
 
-[debug]
-break_cycles=
-
 [statsreset]
 reset_cycle=0
 
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
index ce44cab..3aae57d 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
@@ -8,10 +8,10 @@
 global.BPredUnit.condPredicted                    459                       # Number of conditional branches predicted
 global.BPredUnit.lookups                          898                       # Number of BP lookups
 global.BPredUnit.usedRAS                          171                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  19676                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 179796                       # Number of bytes of host memory used
-host_seconds                                     0.12                       # Real time elapsed on the host
-host_tick_rate                                6183068                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  22132                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 176684                       # Number of bytes of host memory used
+host_seconds                                     0.11                       # Real time elapsed on the host
+host_tick_rate                                6945216                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 10                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 8                       # Number of conflicting stores.
 memdepunit.memDep.insertedLoads                   783                       # Number of loads inserted to the mem dependence unit.
@@ -263,8 +263,8 @@
 system.cpu.iew.lsq.thread.0.squashedLoads          368                       # Number of loads squashed
 system.cpu.iew.lsq.thread.0.squashedStores           87                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             12                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect           96                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect             56                       # Number of branches that were predicted taken incorrectly
+system.cpu.iew.predictedNotTakenIncorrect           95                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect             57                       # Number of branches that were predicted taken incorrectly
 system.cpu.ipc                               0.003174                       # IPC: Instructions Per Cycle
 system.cpu.ipc_total                         0.003174                       # IPC: Total IPC of All Threads
 system.cpu.iq.ISSUE:FU_type_0                    3491                       # Type of FU issued
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
index b3cdfe9..fb2137f 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stderr
@@ -1,2 +1,3 @@
+0: system.remote_gdb.listener: listening for remote gdb on port 7000
 warn: Entering event queue @ 0.  Starting simulation...
 warn: ignoring syscall sigprocmask(1, 18446744073709547831, ...)
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
index ccb6a0e..6436baf 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
@@ -6,8 +6,8 @@
 All Rights Reserved
 
 
-M5 compiled Nov 12 2006 23:25:38
-M5 started Sun Nov 12 23:25:54 2006
-M5 executing on zizzer.eecs.umich.edu
-command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
+M5 compiled Jan 22 2007 23:06:52
+M5 started Mon Jan 22 23:07:09 2007
+M5 executing on ewok
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
 Exiting @ tick 752028 because target called exit()
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
index a1c77c6..6eef745 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
@@ -7,9 +7,6 @@
 output_file=cout
 progress_interval=0
 
-[debug]
-break_cycles=
-
 [exetrace]
 intel_format=false
 legion_lockstep=false
@@ -109,6 +106,7 @@
 numROBEntries=192
 numRobs=1
 numThreads=1
+phase=0
 predType=tournament
 progress_interval=0
 renameToDecodeDelay=1
@@ -390,6 +388,7 @@
 [system.cpu.workload0]
 type=LiveProcess
 cmd=hello
+cwd=
 egid=100
 env=
 euid=100
@@ -405,6 +404,7 @@
 [system.cpu.workload1]
 type=LiveProcess
 cmd=hello
+cwd=
 egid=100
 env=
 euid=100
@@ -430,6 +430,7 @@
 file=
 latency=1
 range=0:134217727
+zero=false
 port=system.membus.port[0]
 
 [trace]
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
index 186f742..f36f666 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out
@@ -10,6 +10,7 @@
 file=
 range=[0,134217727]
 latency=1
+zero=false
 
 [system]
 type=System
@@ -30,6 +31,7 @@
 input=cin
 output=cout
 env=
+cwd=
 system=system
 uid=100
 euid=100
@@ -45,6 +47,7 @@
 input=cin
 output=cout
 env=
+cwd=
 system=system
 uid=100
 euid=100
@@ -184,6 +187,7 @@
 [system.cpu]
 type=DerivO3CPU
 clock=1
+phase=0
 numThreads=1
 activity=0
 workload=system.cpu.workload0 system.cpu.workload1
@@ -424,9 +428,6 @@
 legion_lockstep=false
 trace_system=client
 
-[debug]
-break_cycles=
-
 [statsreset]
 reset_cycle=0
 
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
index 365f828..bb9e936 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
@@ -8,10 +8,10 @@
 global.BPredUnit.condPredicted                   4031                       # Number of conditional branches predicted
 global.BPredUnit.lookups                        12370                       # Number of BP lookups
 global.BPredUnit.usedRAS                         6337                       # Number of times the RAS was used to get a target.
-host_inst_rate                                   9475                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 181200                       # Number of bytes of host memory used
-host_seconds                                     1.19                       # Real time elapsed on the host
-host_tick_rate                                1884343                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  11366                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 178064                       # Number of bytes of host memory used
+host_seconds                                     0.99                       # Real time elapsed on the host
+host_tick_rate                                2259917                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 27                       # Number of conflicting loads.
 memdepunit.memDep.conflictingLoads                 20                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                97                       # Number of conflicting stores.
@@ -470,8 +470,8 @@
 system.cpu.iew.lsq.thread.1.squashedLoads         1843                       # Number of loads squashed
 system.cpu.iew.lsq.thread.1.squashedStores          935                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             63                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          802                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            228                       # Number of branches that were predicted taken incorrectly
+system.cpu.iew.predictedNotTakenIncorrect          798                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            232                       # Number of branches that were predicted taken incorrectly
 system.cpu.ipc_0                             0.002514                       # IPC: Instructions Per Cycle
 system.cpu.ipc_1                             0.002513                       # IPC: Instructions Per Cycle
 system.cpu.ipc_total                         0.005027                       # IPC: Total IPC of All Threads
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
index 87866a2..c36de0b 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stderr
@@ -1 +1,3 @@
+0: system.remote_gdb.listener: listening for remote gdb on port 7000
+0: system.remote_gdb.listener: listening for remote gdb on port 7001
 warn: Entering event queue @ 0.  Starting simulation...
diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
index 0ea937e..f07a960 100644
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
@@ -7,8 +7,8 @@
 All Rights Reserved
 
 
-M5 compiled Nov 12 2006 23:25:38
-M5 started Sun Nov 12 23:26:01 2006
-M5 executing on zizzer.eecs.umich.edu
-command line: build/ALPHA_SE/m5.opt -d build/ALPHA_SE/tests/opt/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
+M5 compiled Jan 22 2007 23:06:52
+M5 started Mon Jan 22 23:07:23 2007
+M5 executing on ewok
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
 Exiting @ tick 2237162 because target called exit()