Add in Context IDs to the simulator.  From now on, cpuId is almost never used,
the primary identifier for a hardware context should be contextId().  The
concept of threads within a CPU remains, in the form of threadId() because
sometimes you need to know which context within a cpu to manipulate.
diff --git a/src/arch/alpha/locked_mem.hh b/src/arch/alpha/locked_mem.hh
index 6f4f5a7..e8928ba 100644
--- a/src/arch/alpha/locked_mem.hh
+++ b/src/arch/alpha/locked_mem.hh
@@ -85,9 +85,9 @@
             stCondFailures++;
             xc->setStCondFailures(stCondFailures);
             if (stCondFailures % 100000 == 0) {
-                warn("cpu %d: %d consecutive "
+                warn("context %d: %d consecutive "
                      "store conditional failures\n",
-                     xc->cpuId(), stCondFailures);
+                     xc->contextId(), stCondFailures);
             }
 
             // store conditional failed already, so don't issue it to mem
diff --git a/src/arch/mips/locked_mem.hh b/src/arch/mips/locked_mem.hh
index 5877b14..9f41ba0 100644
--- a/src/arch/mips/locked_mem.hh
+++ b/src/arch/mips/locked_mem.hh
@@ -83,9 +83,9 @@
             stCondFailures++;
             xc->setStCondFailures(stCondFailures);
             if (stCondFailures % 10 == 0) {
-                warn("%i: cpu %d: %d consecutive "
+                warn("%i: context %d: %d consecutive "
                      "store conditional failures\n",
-                     curTick, xc->cpuId(), stCondFailures);
+                     curTick, xc->contextId(), stCondFailures);
             }
 
             if (stCondFailures == 5000) {
diff --git a/src/arch/sparc/ua2005.cc b/src/arch/sparc/ua2005.cc
index 6961a24..502033d 100644
--- a/src/arch/sparc/ua2005.cc
+++ b/src/arch/sparc/ua2005.cc
@@ -257,11 +257,11 @@
         temp = readRegNoEffect(miscReg) & (STS::active | STS::speculative);
         // Check that the CPU array is fully populated
         // (by calling getNumCPus())
-        assert(sys->getNumCPUs() > tc->cpuId());
+        assert(sys->getNumContexts() > tc->contextId());
 
-        temp |= tc->cpuId()  << STS::shft_id;
+        temp |= tc->contextId()  << STS::shft_id;
 
-        for (x = tc->cpuId() & ~3; x < sys->threadContexts.size(); x++) {
+        for (x = tc->contextId() & ~3; x < sys->threadContexts.size(); x++) {
             switch (sys->threadContexts[x]->status()) {
               case ThreadContext::Active:
                 temp |= STS::st_run << (STS::shft_fsm0 -
diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc
index 17374fa..4980c5f 100644
--- a/src/arch/x86/tlb.cc
+++ b/src/arch/x86/tlb.cc
@@ -654,7 +654,7 @@
         */
         // Force the access to be uncacheable.
         req->setFlags(req->getFlags() | UNCACHEABLE);
-        req->setPaddr(x86LocalAPICAddress(tc->cpuId(), paddr - baseAddr));
+        req->setPaddr(x86LocalAPICAddress(tc->contextId(), paddr - baseAddr));
     }
 #endif
     return NoFault;
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 4845cbf..6409255 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -285,9 +285,9 @@
     for (int i = 0; i < threadContexts.size(); ++i) {
         ThreadContext *tc = threadContexts[i];
 
-        system->registerThreadContext(tc);
+        tc->setContextId(system->registerThreadContext(tc));
 #if !FULL_SYSTEM
-        tc->getProcessPtr()->assignThreadContext(tc->cpuId());
+        tc->getProcessPtr()->assignThreadContext(tc->contextId());
 #endif
     }
 }
@@ -328,8 +328,8 @@
 
         CpuEvent::replaceThreadContext(oldTC, newTC);
 
-        assert(newTC->cpuId() == oldTC->cpuId());
-        system->replaceThreadContext(newTC, newTC->cpuId());
+        assert(newTC->contextId() == oldTC->contextId());
+        system->replaceThreadContext(newTC, newTC->contextId());
 
         if (DTRACE(Context))
             ThreadContext::compare(oldTC, newTC);
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index e0de3a3..3520faf 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -414,6 +414,9 @@
     /** Read this CPU's ID. */
     int cpuId() { return cpu->cpuId(); }
 
+    /** Read this context's system-wide ID **/
+    int contextId() { return thread->contextId(); }
+
     /** Returns the fault type. */
     Fault getFault() { return fault; }
 
@@ -868,7 +871,7 @@
     reqMade = true;
     Request *req = new Request();
     req->setVirt(asid, vaddr, size, flags, PC);
-    req->setThreadContext(thread->cpuId(), threadNumber);
+    req->setThreadContext(thread->contextId(), threadNumber);
 
     fault = cpu->translateDataReadReq(req, thread);
 
@@ -887,7 +890,7 @@
     reqMade = true;
     Request *req = new Request();
     req->setVirt(asid, addr, sizeof(T), flags, this->PC);
-    req->setThreadContext(thread->cpuId(), threadNumber);
+    req->setThreadContext(thread->contextId(), threadNumber);
 
     fault = cpu->translateDataReadReq(req, thread);
 
@@ -942,7 +945,7 @@
     reqMade = true;
     Request *req = new Request();
     req->setVirt(asid, vaddr, size, flags, PC);
-    req->setThreadContext(thread->cpuId(), threadNumber);
+    req->setThreadContext(thread->contextId(), threadNumber);
 
     fault = cpu->translateDataWriteReq(req, thread);
 
@@ -966,7 +969,7 @@
     reqMade = true;
     Request *req = new Request();
     req->setVirt(asid, addr, sizeof(T), flags, this->PC);
-    req->setThreadContext(thread->cpuId(), threadNumber);
+    req->setThreadContext(thread->contextId(), threadNumber);
 
     fault = cpu->translateDataWriteReq(req, thread);
 
diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh
index 9f6fa2b..0428e88 100644
--- a/src/cpu/checker/cpu_impl.hh
+++ b/src/cpu/checker/cpu_impl.hh
@@ -152,7 +152,8 @@
         memReq = new Request(inst->threadNumber, fetch_PC,
                              sizeof(uint32_t),
                              IFETCH_FLAGS(thread->readPC()),
-                             fetch_PC, thread->cpuId(), inst->threadNumber);
+                             fetch_PC, thread->contextId(),
+                             inst->threadNumber);
 
         bool succeeded = translateInstReq(memReq);
 
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index b7cf4f1..26c1552 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -589,9 +589,7 @@
 void
 FullO3CPU<Impl>::init()
 {
-    if (!deferRegistration) {
-        registerThreadContexts();
-    }
+    BaseCPU::init();
 
     // Set inSyscall so that the CPU doesn't squash when initially
     // setting up registers.
@@ -610,7 +608,7 @@
         }
 
 #if FULL_SYSTEM
-        TheISA::initCPU(src_tc, src_tc->cpuId());
+        TheISA::initCPU(src_tc, src_tc->contextId());
 #endif
     }
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 3503166..cff6db2 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -362,7 +362,7 @@
 void
 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 {
-    unsigned tid = pkt->req->getThreadNum();
+    unsigned tid = pkt->req->threadId();
 
     DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
 
@@ -593,7 +593,8 @@
     // Set the appropriate read size and flags as well.
     // Build request here.
     RequestPtr mem_req = new Request(tid, block_PC, cacheBlkSize, 0,
-                                     fetch_PC, cpu->cpuId(), tid);
+                                     fetch_PC, cpu->thread[tid]->contextId(),
+                                     tid);
 
     memReq[tid] = mem_req;
 
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index f8a8257..cf27552 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -371,7 +371,7 @@
 Fault
 LSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
 {
-    unsigned tid = req->getThreadNum();
+    unsigned tid = req->threadId();
 
     return thread[tid].read(req, data, load_idx);
 }
@@ -381,7 +381,7 @@
 Fault
 LSQ<Impl>::write(RequestPtr req, T &data, int store_idx)
 {
-    unsigned tid = req->getThreadNum();
+    unsigned tid = req->threadId();
 
     return thread[tid].write(req, data, store_idx);
 }
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 5aea020..8f9f630 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -85,7 +85,7 @@
     if (pkt->isError())
         DPRINTF(LSQ, "Got error packet back for address: %#X\n", pkt->getAddr());
     if (pkt->isResponse()) {
-        lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
+        lsq->thread[pkt->req->threadId()].completeDataAccess(pkt);
     }
     else {
         // must be a snoop
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index d571d25..c237b95 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -78,6 +78,10 @@
     /** Reads this CPU's ID. */
     virtual int cpuId() { return cpu->cpuId(); }
 
+    virtual int contextId() { return thread->contextId(); }
+
+    virtual void setContextId(int id) { thread->setContextId(id); }
+
 #if FULL_SYSTEM
     /** Returns a pointer to the system. */
     virtual System *getSystemPtr() { return cpu->system; }
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index 853ee2c..50f6e58 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -63,6 +63,7 @@
     // copy over functional state
     setStatus(old_context->status());
     copyArchRegs(old_context);
+    setContextId(old_context->contextId());
 
 #if !FULL_SYSTEM
     thread->funcExeInst = old_context->readFuncExeInst();
diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh
index 52376af..eef1a7b 100644
--- a/src/cpu/ozone/cpu_impl.hh
+++ b/src/cpu/ozone/cpu_impl.hh
@@ -417,7 +417,7 @@
         ThreadContext *tc = threadContexts[i];
 
         // initialize CPU, including PC
-        TheISA::initCPU(tc, tc->cpuId());
+        TheISA::initCPU(tc, tc->contextId());
     }
 #endif
     frontEnd->renameTable.copyFrom(thread.renameTable);
@@ -736,14 +736,6 @@
 
 template <class Impl>
 void
-OzoneCPU<Impl>::OzoneTC::setCpuId(int id)
-{
-    cpu->cpuId = id;
-    thread->setCpuId(id);
-}
-
-template <class Impl>
-void
 OzoneCPU<Impl>::OzoneTC::setStatus(Status new_status)
 {
     thread->setStatus(new_status);
@@ -804,6 +796,7 @@
     setStatus(old_context->status());
     copyArchRegs(old_context);
     setCpuId(old_context->cpuId());
+    setContextId(old_context->contextId());
 
     thread->setInst(old_context->getInst());
 #if !FULL_SYSTEM
diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh
index df3609e..b1e1311 100644
--- a/src/cpu/ozone/front_end_impl.hh
+++ b/src/cpu/ozone/front_end_impl.hh
@@ -477,7 +477,7 @@
     // Setup the memReq to do a read of the first isntruction's address.
     // Set the appropriate read size and flags as well.
     memReq = new Request(0, fetch_PC, cacheBlkSize, 0,
-                         PC, cpu->cpuId(), 0);
+                         PC, cpu->thread->contextId());
 
     // Translate the instruction request.
     fault = cpu->translateInstReq(memReq, thread);
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 5e8ab94..feb8a7f 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -84,7 +84,7 @@
         ThreadContext *tc = threadContexts[i];
 
         // initialize CPU, including PC
-        TheISA::initCPU(tc, _cpuId);
+        TheISA::initCPU(tc, tc->contextId());
     }
 #endif
     if (hasPhysMemPort) {
@@ -93,6 +93,7 @@
         physmemPort.getPeerAddressRanges(pmAddrList, snoop);
         physMemAddr = *pmAddrList.begin();
     }
+    // Atomic doesn't do MT right now, so contextId == threadId
     ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
     data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
     data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 247899c..ca1f028 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -202,7 +202,6 @@
         _status = Idle;
     }
     assert(threadContexts.size() == 1);
-    _cpuId = tc->cpuId();
     previousTick = curTick;
 }
 
diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc
index 6034ca1..5c6b729 100644
--- a/src/cpu/simple_thread.cc
+++ b/src/cpu/simple_thread.cc
@@ -182,6 +182,8 @@
     funcExeInst = oldContext->readFuncExeInst();
 #endif
     inst = oldContext->getInst();
+
+    _contextId = oldContext->contextId();
 }
 
 void
diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc
index 58912c5..ab105a4 100644
--- a/src/cpu/thread_context.cc
+++ b/src/cpu/thread_context.cc
@@ -78,4 +78,11 @@
     int id2 = two->cpuId();
     if (id1 != id2)
         panic("CPU ids don't match, one: %d, two: %d", id1, id2);
+
+    id1 = one->contextId();
+    id2 = two->contextId();
+    if (id1 != id2)
+        panic("Context ids don't match, one: %d, two: %d", id1, id2);
+
+
 }
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index d061940..a94be70 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -117,6 +117,12 @@
 
     virtual int cpuId() = 0;
 
+    virtual int getThreadNum() = 0;
+
+    virtual int contextId() = 0;
+
+    virtual void setContextId(int id) = 0;
+
     virtual TheISA::ITB *getITBPtr() = 0;
 
     virtual TheISA::DTB *getDTBPtr() = 0;
@@ -177,8 +183,6 @@
     virtual void profileSample() = 0;
 #endif
 
-    virtual int getThreadNum() = 0;
-
     // Also somewhat obnoxious.  Really only used for the TLB fault.
     // However, may be quite useful in SPARC.
     virtual TheISA::MachInst getInst() = 0;
@@ -300,6 +304,12 @@
 
     int cpuId() { return actualTC->cpuId(); }
 
+    int getThreadNum() { return actualTC->getThreadNum(); }
+
+    int contextId() { return actualTC->contextId(); }
+
+    void setContextId(int id) { actualTC->setContextId(id); }
+
     TheISA::ITB *getITBPtr() { return actualTC->getITBPtr(); }
 
     TheISA::DTB *getDTBPtr() { return actualTC->getDTBPtr(); }
@@ -360,9 +370,6 @@
     void profileClear() { return actualTC->profileClear(); }
     void profileSample() { return actualTC->profileSample(); }
 #endif
-
-    int getThreadNum() { return actualTC->getThreadNum(); }
-
     // @todo: Do I need this?
     MachInst getInst() { return actualTC->getInst(); }
 
diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh
index f3f154d..fdb2ab0 100644
--- a/src/cpu/thread_state.hh
+++ b/src/cpu/thread_state.hh
@@ -80,6 +80,10 @@
 
     int cpuId() { return baseCpu->cpuId(); }
 
+    int contextId() { return _contextId; }
+
+    void setContextId(int id) { _contextId = id; }
+
     void setTid(int id) { tid = id; }
 
     int readTid() { return tid; }
@@ -169,6 +173,9 @@
     // Pointer to the base CPU.
     BaseCPU *baseCpu;
 
+    // system wide HW context id
+    int _contextId;
+
     // Index of hardware thread context on the CPU that this represents.
     int tid;
 
diff --git a/src/dev/alpha/backdoor.cc b/src/dev/alpha/backdoor.cc
index 3ba6cbd..66f682e 100644
--- a/src/dev/alpha/backdoor.cc
+++ b/src/dev/alpha/backdoor.cc
@@ -84,7 +84,7 @@
 AlphaBackdoor::startup()
 {
     system->setAlphaAccess(pioAddr);
-    alphaAccess->numCPUs = system->getNumCPUs();
+    alphaAccess->numCPUs = system->numContexts();
     alphaAccess->kernStart = system->getKernelStart();
     alphaAccess->kernEnd = system->getKernelEnd();
     alphaAccess->entryPoint = system->getKernelEntry();
diff --git a/src/dev/alpha/tsunami_cchip.cc b/src/dev/alpha/tsunami_cchip.cc
index 4477b5a..52a2aea 100644
--- a/src/dev/alpha/tsunami_cchip.cc
+++ b/src/dev/alpha/tsunami_cchip.cc
@@ -110,7 +110,13 @@
                    break;
               case TSDEV_CC_MISC:
                   pkt->set(((ipint << 8) & 0xF) | ((itint << 4) & 0xF) |
-                                     (pkt->req->getCpuNum() & 0x3));
+                                     (pkt->req->contextId() & 0x3));
+                  // currently, FS cannot handle MT so contextId and
+                  // cpuId are effectively the same, don't know if it will
+                  // matter if FS becomes MT enabled.  I suspect no because
+                  // we are currently able to boot up to 64 procs anyway
+                  // which would render the CPUID of this register useless
+                  // anyway
                   break;
               case TSDEV_CC_AAR0:
               case TSDEV_CC_AAR1:
diff --git a/src/dev/mips/malta_cchip.cc b/src/dev/mips/malta_cchip.cc
index 5a4ea45..2659776 100755
--- a/src/dev/mips/malta_cchip.cc
+++ b/src/dev/mips/malta_cchip.cc
@@ -103,7 +103,7 @@
                    break;
               case TSDEV_CC_MISC:
                   pkt->set((ipint << 8) & 0xF | (itint << 4) & 0xF |
-                                     (pkt->req->getCpuNum() & 0x3));
+                                     (pkt->req->contextId() & 0x3));
                   break;
               case TSDEV_CC_AAR0:
               case TSDEV_CC_AAR1:
diff --git a/src/dev/sinic.cc b/src/dev/sinic.cc
index 49806d3..bf7398e 100644
--- a/src/dev/sinic.cc
+++ b/src/dev/sinic.cc
@@ -379,7 +379,7 @@
     assert(config.command & PCI_CMD_MSE);
     assert(pkt->getAddr() >= BARAddrs[0] && pkt->getSize() < BARSize[0]);
 
-    int cpu = pkt->req->getCpuNum();
+    int cpu = pkt->req->contextId();
     Addr daddr = pkt->getAddr() - BARAddrs[0];
     Addr index = daddr >> Regs::VirtualShift;
     Addr raddr = daddr & Regs::VirtualMask;
@@ -466,7 +466,7 @@
     assert(config.command & PCI_CMD_MSE);
     assert(pkt->getAddr() >= BARAddrs[0] && pkt->getSize() < BARSize[0]);
 
-    int cpu = pkt->req->getCpuNum();
+    int cpu = pkt->req->contextId();
     Addr daddr = pkt->getAddr() - BARAddrs[0];
     Addr index = daddr >> Regs::VirtualShift;
     Addr raddr = daddr & Regs::VirtualMask;
diff --git a/src/dev/sparc/iob.cc b/src/dev/sparc/iob.cc
index 6608fc6..f99ee58 100644
--- a/src/dev/sparc/iob.cc
+++ b/src/dev/sparc/iob.cc
@@ -120,7 +120,7 @@
 Iob::readJBus(PacketPtr pkt)
 {
         Addr accessAddr = pkt->getAddr() - iobJBusAddr;
-        int cpuid = pkt->req->getCpuNum();
+        int cpuid = pkt->req->contextId();
         int index;
         uint64_t data;
 
@@ -235,7 +235,7 @@
 Iob::writeJBus(PacketPtr pkt)
 {
         Addr accessAddr = pkt->getAddr() - iobJBusAddr;
-        int cpuid = pkt->req->getCpuNum();
+        int cpuid = pkt->req->contextId();
         int index;
         uint64_t data;
 
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh
index 9bfbd64..bdf323d 100644
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -108,18 +108,16 @@
      */
     class Lock {
       public:
-        int cpuNum;     // locking CPU
-        int threadNum;  // locking thread ID within CPU
+        int contextId;     // locking context
 
         // check for matching execution context
         bool matchesContext(Request *req)
         {
-            return (cpuNum == req->getCpuNum() &&
-                    threadNum == req->getThreadNum());
+            return (contextId == req->contextId());
         }
 
         Lock(Request *req)
-            : cpuNum(req->getCpuNum()), threadNum(req->getThreadNum())
+            : contextId(req->contextId())
         {
         }
     };
diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc
index fcc02ff..a7e6cfd 100644
--- a/src/mem/cache/prefetch/base.cc
+++ b/src/mem/cache/prefetch/base.cc
@@ -203,8 +203,8 @@
             PacketPtr prefetch;
             prefetch = new Packet(prefetchReq, MemCmd::HardPFReq, -1);
             prefetch->allocate();
-            prefetch->req->setThreadContext(pkt->req->getCpuNum(),
-                                            pkt->req->getThreadNum());
+            prefetch->req->setThreadContext(pkt->req->contextId(),
+                                            pkt->req->threadId());
 
             prefetch->time = time + (*delay); //@todo ADD LATENCY HERE
             //... initialize
diff --git a/src/mem/cache/prefetch/ghb.cc b/src/mem/cache/prefetch/ghb.cc
index f5b88e1..c8b87e9 100644
--- a/src/mem/cache/prefetch/ghb.cc
+++ b/src/mem/cache/prefetch/ghb.cc
@@ -42,16 +42,16 @@
                                  std::list<Tick> &delays)
 {
     Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
-    int cpuID = pkt->req->getCpuNum();
-    if (!useCPUId) cpuID = 0;
+    int contextId = pkt->req->contextId();
+    if (!useContextId) contextId = 0;
 
 
-    int new_stride = blkAddr - last_miss_addr[cpuID];
-    int old_stride = last_miss_addr[cpuID] -
-        second_last_miss_addr[cpuID];
+    int new_stride = blkAddr - last_miss_addr[contextId];
+    int old_stride = last_miss_addr[contextId] -
+        second_last_miss_addr[contextId];
 
-    second_last_miss_addr[cpuID] = last_miss_addr[cpuID];
-    last_miss_addr[cpuID] = blkAddr;
+    second_last_miss_addr[contextId] = last_miss_addr[contextId];
+    last_miss_addr[contextId] = blkAddr;
 
     if (new_stride == old_stride) {
         for (int d=1; d <= degree; d++) {
diff --git a/src/mem/cache/prefetch/ghb.hh b/src/mem/cache/prefetch/ghb.hh
index 4fb6920..156a74a 100644
--- a/src/mem/cache/prefetch/ghb.hh
+++ b/src/mem/cache/prefetch/ghb.hh
@@ -47,13 +47,13 @@
 
     Tick latency;
     int degree;
-    bool useCPUId;
+    bool useContextId;
 
   public:
 
     GHBPrefetcher(const BaseCacheParams *p)
         : BasePrefetcher(p), latency(p->prefetch_latency),
-          degree(p->prefetch_degree), useCPUId(p->prefetch_use_cpu_id)
+          degree(p->prefetch_degree), useContextId(p->prefetch_use_cpu_id)
     {
     }
 
diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc
index e93058d..ad5846d 100644
--- a/src/mem/cache/prefetch/stride.cc
+++ b/src/mem/cache/prefetch/stride.cc
@@ -41,18 +41,18 @@
                                     std::list<Tick> &delays)
 {
 //      Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1);
-    int cpuID = pkt->req->getCpuNum();
-    if (!useCPUId) cpuID = 0;
+    int contextId = pkt->req->contextId();
+    if (!useContextId) contextId = 0;
 
     /* Scan Table for IAddr Match */
 /*      std::list<strideEntry*>::iterator iter;
-  for (iter=table[cpuID].begin();
-  iter !=table[cpuID].end();
+  for (iter=table[contextId].begin();
+  iter !=table[contextId].end();
   iter++) {
   if ((*iter)->IAddr == pkt->pc) break;
   }
 
-  if (iter != table[cpuID].end()) {
+  if (iter != table[contextId].end()) {
   //Hit in table
 
   int newStride = blkAddr - (*iter)->MAddr;
diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh
index ca17327..4738fd9 100644
--- a/src/mem/cache/prefetch/stride.hh
+++ b/src/mem/cache/prefetch/stride.hh
@@ -63,14 +63,14 @@
     std::list<strideEntry*> table[64/*MAX_CPUS*/];
     Tick latency;
     int degree;
-    bool useCPUId;
+    bool useContextId;
 
 
   public:
 
     StridePrefetcher(const BaseCacheParams *p)
         : BasePrefetcher(p), latency(p->prefetch_latency),
-          degree(p->prefetch_degree), useCPUId(p->prefetch_use_cpu_id)
+          degree(p->prefetch_degree), useContextId(p->prefetch_use_cpu_id)
     {
     }
 
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index 20e1966..16ff3de 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -139,16 +139,16 @@
 
     for (i = lockedAddrList.begin(); i != lockedAddrList.end(); ++i) {
         if (i->matchesContext(req)) {
-            DPRINTF(LLSC, "Modifying lock record: cpu %d thread %d addr %#x\n",
-                    req->getCpuNum(), req->getThreadNum(), paddr);
+            DPRINTF(LLSC, "Modifying lock record: context %d addr %#x\n",
+                    req->contextId(), paddr);
             i->addr = paddr;
             return;
         }
     }
 
     // no record for this xc: need to allocate a new one
-    DPRINTF(LLSC, "Adding lock record: cpu %d thread %d addr %#x\n",
-            req->getCpuNum(), req->getThreadNum(), paddr);
+    DPRINTF(LLSC, "Adding lock record: context %d addr %#x\n",
+            req->contextId(), paddr);
     lockedAddrList.push_front(LockedAddr(req));
 }
 
@@ -183,14 +183,14 @@
                 // it's a store conditional, and as far as the memory
                 // system can tell, the requesting context's lock is
                 // still valid.
-                DPRINTF(LLSC, "StCond success: cpu %d thread %d addr %#x\n",
-                        req->getCpuNum(), req->getThreadNum(), paddr);
+                DPRINTF(LLSC, "StCond success: context %d addr %#x\n",
+                        req->contextId(), paddr);
                 success = true;
             }
 
             // Get rid of our record of this lock and advance to next
-            DPRINTF(LLSC, "Erasing lock record: cpu %d thread %d addr %#x\n",
-                    i->cpuNum, i->threadNum, paddr);
+            DPRINTF(LLSC, "Erasing lock record: context %d addr %#x\n",
+                    i->contextId, paddr);
             i = lockedAddrList.erase(i);
         }
         else {
diff --git a/src/mem/physical.hh b/src/mem/physical.hh
index 2a0086b..d18138e 100644
--- a/src/mem/physical.hh
+++ b/src/mem/physical.hh
@@ -90,20 +90,17 @@
         static Addr mask(Addr paddr) { return (paddr & ~Addr_Mask); }
 
         Addr addr;      // locked address
-        int cpuNum;     // locking CPU
-        int threadNum;  // locking thread ID within CPU
+        int contextId;     // locking hw context
 
         // check for matching execution context
         bool matchesContext(Request *req)
         {
-            return (cpuNum == req->getCpuNum() &&
-                    threadNum == req->getThreadNum());
+            return (contextId == req->contextId());
         }
 
         LockedAddr(Request *req)
             : addr(mask(req->getPaddr())),
-              cpuNum(req->getCpuNum()),
-              threadNum(req->getThreadNum())
+              contextId(req->contextId())
         {
         }
     };
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 613655e..da0d9c7 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -115,10 +115,10 @@
      * store conditional or the compare value for a CAS. */
     uint64_t extraData;
 
-    /** The cpu number (for statistics, typically). */
-    int cpuNum;
-    /** The requesting thread id (for statistics, typically). */
-    int  threadNum;
+    /** The context ID (for statistics, typically). */
+    int _contextId;
+    /** The thread ID (id within this CPU) */
+    int _threadId;
 
     /** program counter of initiating access; for tracing/debugging */
     Addr pc;
@@ -129,8 +129,8 @@
     bool validAsidVaddr;
     /** Whether or not the sc result is valid. */
     bool validExData;
-    /** Whether or not the cpu number & thread ID are valid. */
-    bool validCpuAndThreadNums;
+    /** Whether or not the context ID is valid. */
+    bool validContextAndThreadIds;
     /** Whether or not the pc is valid. */
     bool validPC;
 
@@ -138,7 +138,7 @@
     /** Minimal constructor.  No fields are initialized. */
     Request()
         : validPaddr(false), validAsidVaddr(false),
-          validExData(false), validCpuAndThreadNums(false), validPC(false)
+          validExData(false), validContextAndThreadIds(false), validPC(false)
     {}
 
     /**
@@ -146,13 +146,13 @@
      * just physical address, size, flags, and timestamp (to curTick).
      * These fields are adequate to perform a request.  */
     Request(Addr _paddr, int _size, int _flags)
-        : validCpuAndThreadNums(false)
+        : validContextAndThreadIds(false)
     { setPhys(_paddr, _size, _flags); }
 
     Request(int _asid, Addr _vaddr, int _size, int _flags, Addr _pc,
-            int _cpuNum, int _threadNum)
+            int _context_id, int _thread_id)
     {
-        setThreadContext(_cpuNum, _threadNum);
+        setThreadContext(_context_id, _thread_id);
         setVirt(_asid, _vaddr, _size, _flags, _pc);
     }
 
@@ -160,11 +160,11 @@
 
     /**
      * Set up CPU and thread numbers. */
-    void setThreadContext(int _cpuNum, int _threadNum)
+    void setThreadContext(int _context_id, int _thread_id)
     {
-        cpuNum = _cpuNum;
-        threadNum = _threadNum;
-        validCpuAndThreadNums = true;
+        _contextId = _context_id;
+        _threadId = _thread_id;
+        validContextAndThreadIds = true;
     }
 
     /**
@@ -261,10 +261,10 @@
     void setExtraData(uint64_t _extraData)
     { extraData = _extraData; validExData = true; }
 
-    /** Accessor function for cpu number.*/
-    int getCpuNum() { assert(validCpuAndThreadNums); return cpuNum; }
-    /** Accessor function for thread number.*/
-    int getThreadNum()  { assert(validCpuAndThreadNums); return threadNum; }
+    /** Accessor function for context ID.*/
+    int contextId() { assert(validContextAndThreadIds); return _contextId; }
+    /** Accessor function for thread ID. */
+    int threadId() { assert(validContextAndThreadIds); return _threadId; }
 
     /** Accessor function for pc.*/
     Addr getPC() { assert(validPC); return pc; }
diff --git a/src/sim/system.cc b/src/sim/system.cc
index 8f25b4b..9704c83 100644
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@@ -58,7 +58,7 @@
 int System::numSystemsRunning = 0;
 
 System::System(Params *p)
-    : SimObject(p), physmem(p->physmem), numcpus(0),
+    : SimObject(p), physmem(p->physmem), _numContexts(0),
 #if FULL_SYSTEM
       init_param(p->init_param),
       functionalPort(p->name + "-fport"),
@@ -181,7 +181,7 @@
         panic("Cannot have two CPUs with the same id (%d)\n", id);
 
     threadContexts[id] = tc;
-    numcpus++;
+    _numContexts++;
 
     int port = getRemoteGDBPort();
     if (rgdb_enable && port) {
diff --git a/src/sim/system.hh b/src/sim/system.hh
index 26cac71..e993a7a 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -87,19 +87,19 @@
     PCEventQueue pcEventQueue;
 
     std::vector<ThreadContext *> threadContexts;
-    int numcpus;
+    int _numContexts;
 
     ThreadContext * getThreadContext(int tid)
     {
         return threadContexts[tid];
     }
 
-    int getNumCPUs()
+    int numContexts()
     {
-        if (numcpus != threadContexts.size())
+        if (_numContexts != threadContexts.size())
             panic("cpu array not fully populated!");
 
-        return numcpus;
+        return _numContexts;
     }
 
 #if FULL_SYSTEM