| /* |
| * Copyright (c) 2013 - 2016 ARM Limited |
| * All rights reserved |
| * |
| * The license below extends only to copyright in the software and shall |
| * not be construed as granting a license to any other intellectual |
| * property including but not limited to intellectual property relating |
| * to a hardware implementation of the functionality of the software |
| * licensed hereunder. You may use the software subject to the license |
| * terms below provided that you ensure that this notice is replicated |
| * unmodified and in its entirety in all distributions of the software, |
| * modified or unmodified, in source code or in binary form. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "cpu/trace/trace_cpu.hh" |
| |
| #include "sim/sim_exit.hh" |
| |
| // Declare and initialize the static counter for number of trace CPUs. |
| int TraceCPU::numTraceCPUs = 0; |
| |
| TraceCPU::TraceCPU(TraceCPUParams *params) |
| : BaseCPU(params), |
| icachePort(this), |
| dcachePort(this), |
| instMasterID(params->system->getMasterId(this, "inst")), |
| dataMasterID(params->system->getMasterId(this, "data")), |
| instTraceFile(params->instTraceFile), |
| dataTraceFile(params->dataTraceFile), |
| icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile), |
| dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile, |
| params), |
| icacheNextEvent([this]{ schedIcacheNext(); }, name()), |
| dcacheNextEvent([this]{ schedDcacheNext(); }, name()), |
| oneTraceComplete(false), |
| traceOffset(0), |
| execCompleteEvent(nullptr), |
| enableEarlyExit(params->enableEarlyExit), |
| progressMsgInterval(params->progressMsgInterval), |
| progressMsgThreshold(params->progressMsgInterval) |
| { |
| // Increment static counter for number of Trace CPUs. |
| ++TraceCPU::numTraceCPUs; |
| |
| // Check that the python parameters for sizes of ROB, store buffer and |
| // load buffer do not overflow the corresponding C++ variables. |
| fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " |
| "max. value of %d.\n", params->sizeROB, UINT16_MAX); |
| fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " |
| "exceeds the max. value of %d.\n", params->sizeROB, |
| UINT16_MAX); |
| fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" |
| " %d exceeds the max. value of %d.\n", |
| params->sizeLoadBuffer, UINT16_MAX); |
| } |
| |
| TraceCPU::~TraceCPU() |
| { |
| |
| } |
| |
| TraceCPU* |
| TraceCPUParams::create() |
| { |
| return new TraceCPU(this); |
| } |
| |
| void |
| TraceCPU::updateNumOps(uint64_t rob_num) |
| { |
| numOps = rob_num; |
| if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) { |
| inform("%s: %i insts committed\n", name(), progressMsgThreshold); |
| progressMsgThreshold += progressMsgInterval; |
| } |
| } |
| |
| void |
| TraceCPU::takeOverFrom(BaseCPU *oldCPU) |
| { |
| // Unbind the ports of the old CPU and bind the ports of the TraceCPU. |
| getInstPort().takeOverFrom(&oldCPU->getInstPort()); |
| getDataPort().takeOverFrom(&oldCPU->getDataPort()); |
| } |
| |
| void |
| TraceCPU::init() |
| { |
| DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." |
| "\n", instTraceFile); |
| DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", |
| dataTraceFile); |
| |
| BaseCPU::init(); |
| |
| // Get the send tick of the first instruction read request |
| Tick first_icache_tick = icacheGen.init(); |
| |
| // Get the send tick of the first data read/write request |
| Tick first_dcache_tick = dcacheGen.init(); |
| |
| // Set the trace offset as the minimum of that in both traces |
| traceOffset = std::min(first_icache_tick, first_dcache_tick); |
| inform("%s: Time offset (tick) found as min of both traces is %lli.\n", |
| name(), traceOffset); |
| |
| // Schedule next icache and dcache event by subtracting the offset |
| schedule(icacheNextEvent, first_icache_tick - traceOffset); |
| schedule(dcacheNextEvent, first_dcache_tick - traceOffset); |
| |
| // Adjust the trace offset for the dcache generator's ready nodes |
| // We don't need to do this for the icache generator as it will |
| // send its first request at the first event and schedule subsequent |
| // events using a relative tick delta |
| dcacheGen.adjustInitTraceOffset(traceOffset); |
| |
| // If the Trace CPU simulation is configured to exit on any one trace |
| // completion then we don't need a counted event to count down all Trace |
| // CPUs in the system. If not then instantiate a counted event. |
| if (!enableEarlyExit) { |
| // The static counter for number of Trace CPUs is correctly set at |
| // this point so create an event and pass it. |
| execCompleteEvent = new CountedExitEvent("end of all traces reached.", |
| numTraceCPUs); |
| } |
| |
| } |
| |
| void |
| TraceCPU::schedIcacheNext() |
| { |
| DPRINTF(TraceCPUInst, "IcacheGen event.\n"); |
| |
| // Try to send the current packet or a retry packet if there is one |
| bool sched_next = icacheGen.tryNext(); |
| // If packet sent successfully, schedule next event |
| if (sched_next) { |
| DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " |
| "at %d.\n", curTick() + icacheGen.tickDelta()); |
| schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); |
| ++numSchedIcacheEvent; |
| } else { |
| // check if traceComplete. If not, do nothing because sending failed |
| // and next event will be scheduled via RecvRetry() |
| if (icacheGen.isTraceComplete()) { |
| // If this is the first trace to complete, set the variable. If it |
| // is already set then both traces are complete to exit sim. |
| checkAndSchedExitEvent(); |
| } |
| } |
| return; |
| } |
| |
| void |
| TraceCPU::schedDcacheNext() |
| { |
| DPRINTF(TraceCPUData, "DcacheGen event.\n"); |
| |
| // Update stat for numCycles |
| numCycles = clockEdge() / clockPeriod(); |
| |
| dcacheGen.execute(); |
| if (dcacheGen.isExecComplete()) { |
| checkAndSchedExitEvent(); |
| } |
| } |
| |
| void |
| TraceCPU::checkAndSchedExitEvent() |
| { |
| if (!oneTraceComplete) { |
| oneTraceComplete = true; |
| } else { |
| // Schedule event to indicate execution is complete as both |
| // instruction and data access traces have been played back. |
| inform("%s: Execution complete.\n", name()); |
| // If the replay is configured to exit early, that is when any one |
| // execution is complete then exit immediately and return. Otherwise, |
| // schedule the counted exit that counts down completion of each Trace |
| // CPU. |
| if (enableEarlyExit) { |
| exitSimLoop("End of trace reached"); |
| } else { |
| schedule(*execCompleteEvent, curTick()); |
| } |
| } |
| } |
| |
| void |
| TraceCPU::regStats() |
| { |
| |
| BaseCPU::regStats(); |
| |
| numSchedDcacheEvent |
| .name(name() + ".numSchedDcacheEvent") |
| .desc("Number of events scheduled to trigger data request generator") |
| ; |
| |
| numSchedIcacheEvent |
| .name(name() + ".numSchedIcacheEvent") |
| .desc("Number of events scheduled to trigger instruction request generator") |
| ; |
| |
| numOps |
| .name(name() + ".numOps") |
| .desc("Number of micro-ops simulated by the Trace CPU") |
| ; |
| |
| cpi |
| .name(name() + ".cpi") |
| .desc("Cycles per micro-op used as a proxy for CPI") |
| .precision(6) |
| ; |
| cpi = numCycles/numOps; |
| |
| icacheGen.regStats(); |
| dcacheGen.regStats(); |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::regStats() |
| { |
| using namespace Stats; |
| |
| maxDependents |
| .name(name() + ".maxDependents") |
| .desc("Max number of dependents observed on a node") |
| ; |
| |
| maxReadyListSize |
| .name(name() + ".maxReadyListSize") |
| .desc("Max size of the ready list observed") |
| ; |
| |
| numSendAttempted |
| .name(name() + ".numSendAttempted") |
| .desc("Number of first attempts to send a request") |
| ; |
| |
| numSendSucceeded |
| .name(name() + ".numSendSucceeded") |
| .desc("Number of successful first attempts") |
| ; |
| |
| numSendFailed |
| .name(name() + ".numSendFailed") |
| .desc("Number of failed first attempts") |
| ; |
| |
| numRetrySucceeded |
| .name(name() + ".numRetrySucceeded") |
| .desc("Number of successful retries") |
| ; |
| |
| numSplitReqs |
| .name(name() + ".numSplitReqs") |
| .desc("Number of split requests") |
| ; |
| |
| numSOLoads |
| .name(name() + ".numSOLoads") |
| .desc("Number of strictly ordered loads") |
| ; |
| |
| numSOStores |
| .name(name() + ".numSOStores") |
| .desc("Number of strictly ordered stores") |
| ; |
| |
| dataLastTick |
| .name(name() + ".dataLastTick") |
| .desc("Last tick simulated from the elastic data trace") |
| ; |
| } |
| |
| Tick |
| TraceCPU::ElasticDataGen::init() |
| { |
| DPRINTF(TraceCPUData, "Initializing data memory request generator " |
| "DcacheGen: elastic issue with retry.\n"); |
| |
| if (!readNextWindow()) |
| panic("Trace has %d elements. It must have at least %d elements.\n", |
| depGraph.size(), 2 * windowSize); |
| DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", |
| depGraph.size()); |
| |
| if (!readNextWindow()) |
| panic("Trace has %d elements. It must have at least %d elements.\n", |
| depGraph.size(), 2 * windowSize); |
| DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", |
| depGraph.size()); |
| |
| // Print readyList |
| if (DTRACE(TraceCPUData)) { |
| printReadyList(); |
| } |
| auto free_itr = readyList.begin(); |
| DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" |
| " is %d.\n", free_itr->seqNum, free_itr->execTick); |
| // Return the execute tick of the earliest ready node so that an event |
| // can be scheduled to call execute() |
| return (free_itr->execTick); |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) { |
| for (auto& free_node : readyList) { |
| free_node.execTick -= offset; |
| } |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::exit() |
| { |
| trace.reset(); |
| } |
| |
| bool |
| TraceCPU::ElasticDataGen::readNextWindow() |
| { |
| |
| // Read and add next window |
| DPRINTF(TraceCPUData, "Reading next window from file.\n"); |
| |
| if (traceComplete) { |
| // We are at the end of the file, thus we have no more records. |
| // Return false. |
| return false; |
| } |
| |
| DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", |
| depGraph.size()); |
| |
| uint32_t num_read = 0; |
| while (num_read != windowSize) { |
| |
| // Create a new graph node |
| GraphNode* new_node = new GraphNode; |
| |
| // Read the next line to get the next record. If that fails then end of |
| // trace has been reached and traceComplete needs to be set in addition |
| // to returning false. |
| if (!trace.read(new_node)) { |
| DPRINTF(TraceCPUData, "\tTrace complete!\n"); |
| traceComplete = true; |
| return false; |
| } |
| |
| // Annotate the ROB dependencies of the new node onto the parent nodes. |
| addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); |
| // Annotate the register dependencies of the new node onto the parent |
| // nodes. |
| addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); |
| |
| num_read++; |
| // Add to map |
| depGraph[new_node->seqNum] = new_node; |
| if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { |
| // Source dependencies are already complete, check if resources |
| // are available and issue. The execution time is approximated |
| // to current time plus the computational delay. |
| checkAndIssue(new_node); |
| } |
| } |
| |
| DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", |
| depGraph.size()); |
| return true; |
| } |
| |
| template<typename T> void |
| TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node, |
| T& dep_array, uint8_t& num_dep) |
| { |
| for (auto& a_dep : dep_array) { |
| // The convention is to set the dependencies starting with the first |
| // index in the ROB and register dependency arrays. Thus, when we reach |
| // a dependency equal to the initialisation value of zero, we know have |
| // iterated over all dependencies and can break. |
| if (a_dep == 0) |
| break; |
| // We look up the valid dependency, i.e. the parent of this node |
| auto parent_itr = depGraph.find(a_dep); |
| if (parent_itr != depGraph.end()) { |
| // If the parent is found, it is yet to be executed. Append a |
| // pointer to the new node to the dependents list of the parent |
| // node. |
| parent_itr->second->dependents.push_back(new_node); |
| auto num_depts = parent_itr->second->dependents.size(); |
| maxDependents = std::max<double>(num_depts, maxDependents.value()); |
| } else { |
| // The dependency is not found in the graph. So consider |
| // the execution of the parent is complete, i.e. remove this |
| // dependency. |
| a_dep = 0; |
| num_dep--; |
| } |
| } |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::execute() |
| { |
| DPRINTF(TraceCPUData, "Execute start occupancy:\n"); |
| DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " |
| "depFreeQueue = %d ,", depGraph.size(), readyList.size(), |
| depFreeQueue.size()); |
| hwResource.printOccupancy(); |
| |
| // Read next window to make sure that dependents of all dep-free nodes |
| // are in the depGraph |
| if (nextRead) { |
| readNextWindow(); |
| nextRead = false; |
| } |
| |
| // First attempt to issue the pending dependency-free nodes held |
| // in depFreeQueue. If resources have become available for a node, |
| // then issue it, i.e. add the node to readyList. |
| while (!depFreeQueue.empty()) { |
| if (checkAndIssue(depFreeQueue.front(), false)) { |
| DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " |
| "%lli.\n", (depFreeQueue.front())->seqNum); |
| depFreeQueue.pop(); |
| } else { |
| break; |
| } |
| } |
| // Proceed to execute from readyList |
| auto graph_itr = depGraph.begin(); |
| auto free_itr = readyList.begin(); |
| // Iterate through readyList until the next free node has its execute |
| // tick later than curTick or the end of readyList is reached |
| while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { |
| |
| // Get pointer to the node to be executed |
| graph_itr = depGraph.find(free_itr->seqNum); |
| assert(graph_itr != depGraph.end()); |
| GraphNode* node_ptr = graph_itr->second; |
| |
| // If there is a retryPkt send that else execute the load |
| if (retryPkt) { |
| // The retryPkt must be the request that was created by the |
| // first node in the readyList. |
| if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { |
| panic("Retry packet's seqence number does not match " |
| "the first node in the readyList.\n"); |
| } |
| if (port.sendTimingReq(retryPkt)) { |
| ++numRetrySucceeded; |
| retryPkt = nullptr; |
| } |
| } else if (node_ptr->isLoad() || node_ptr->isStore()) { |
| // If there is no retryPkt, attempt to send a memory request in |
| // case of a load or store node. If the send fails, executeMemReq() |
| // returns a packet pointer, which we save in retryPkt. In case of |
| // a comp node we don't do anything and simply continue as if the |
| // execution of the comp node succedded. |
| retryPkt = executeMemReq(node_ptr); |
| } |
| // If the retryPkt or a new load/store node failed, we exit from here |
| // as a retry from cache will bring the control to execute(). The |
| // first node in readyList then, will be the failed node. |
| if (retryPkt) { |
| break; |
| } |
| |
| // Proceed to remove dependencies for the successfully executed node. |
| // If it is a load which is not strictly ordered and we sent a |
| // request for it successfully, we do not yet mark any register |
| // dependencies complete. But as per dependency modelling we need |
| // to mark ROB dependencies of load and non load/store nodes which |
| // are based on successful sending of the load as complete. |
| if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) { |
| // If execute succeeded mark its dependents as complete |
| DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " |
| "dependents..\n", node_ptr->seqNum); |
| |
| auto child_itr = (node_ptr->dependents).begin(); |
| while (child_itr != (node_ptr->dependents).end()) { |
| // ROB dependency of a store on a load must not be removed |
| // after load is sent but after response is received |
| if (!(*child_itr)->isStore() && |
| (*child_itr)->removeRobDep(node_ptr->seqNum)) { |
| |
| // Check if the child node has become dependency free |
| if ((*child_itr)->numRobDep == 0 && |
| (*child_itr)->numRegDep == 0) { |
| |
| // Source dependencies are complete, check if |
| // resources are available and issue |
| checkAndIssue(*child_itr); |
| } |
| // Remove this child for the sent load and point to new |
| // location of the element following the erased element |
| child_itr = node_ptr->dependents.erase(child_itr); |
| } else { |
| // This child is not dependency-free, point to the next |
| // child |
| child_itr++; |
| } |
| } |
| } else { |
| // If it is a strictly ordered load mark its dependents as complete |
| // as we do not send a request for this case. If it is a store or a |
| // comp node we also mark all its dependents complete. |
| DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" |
| " up dependents..\n", node_ptr->seqNum); |
| |
| for (auto child : node_ptr->dependents) { |
| // If the child node is dependency free removeDepOnInst() |
| // returns true. |
| if (child->removeDepOnInst(node_ptr->seqNum)) { |
| // Source dependencies are complete, check if resources |
| // are available and issue |
| checkAndIssue(child); |
| } |
| } |
| } |
| |
| // After executing the node, remove from readyList and delete node. |
| readyList.erase(free_itr); |
| // If it is a cacheable load which was sent, don't delete |
| // just yet. Delete it in completeMemAccess() after the |
| // response is received. If it is an strictly ordered |
| // load, it was not sent and all dependencies were simply |
| // marked complete. Thus it is safe to delete it. For |
| // stores and non load/store nodes all dependencies were |
| // marked complete so it is safe to delete it. |
| if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) { |
| // Release all resources occupied by the completed node |
| hwResource.release(node_ptr); |
| // clear the dynamically allocated set of dependents |
| (node_ptr->dependents).clear(); |
| // Update the stat for numOps simulated |
| owner.updateNumOps(node_ptr->robNum); |
| // delete node |
| delete node_ptr; |
| // remove from graph |
| depGraph.erase(graph_itr); |
| } |
| // Point to first node to continue to next iteration of while loop |
| free_itr = readyList.begin(); |
| } // end of while loop |
| |
| // Print readyList, sizes of queues and resource status after updating |
| if (DTRACE(TraceCPUData)) { |
| printReadyList(); |
| DPRINTF(TraceCPUData, "Execute end occupancy:\n"); |
| DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " |
| "depFreeQueue = %d ,", depGraph.size(), readyList.size(), |
| depFreeQueue.size()); |
| hwResource.printOccupancy(); |
| } |
| |
| if (retryPkt) { |
| DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" |
| "event from the cache for seq. num %lli.\n", |
| retryPkt->req->getReqInstSeqNum()); |
| return; |
| } |
| // If the size of the dependency graph is less than the dependency window |
| // then read from the trace file to populate the graph next time we are in |
| // execute. |
| if (depGraph.size() < windowSize && !traceComplete) |
| nextRead = true; |
| |
| // If cache is not blocked, schedule an event for the first execTick in |
| // readyList else retry from cache will schedule the event. If the ready |
| // list is empty then check if the next pending node has resources |
| // available to issue. If yes, then schedule an event for the next cycle. |
| if (!readyList.empty()) { |
| Tick next_event_tick = std::max(readyList.begin()->execTick, |
| curTick()); |
| DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", |
| next_event_tick); |
| owner.schedDcacheNextEvent(next_event_tick); |
| } else if (readyList.empty() && !depFreeQueue.empty() && |
| hwResource.isAvailable(depFreeQueue.front())) { |
| DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", |
| owner.clockEdge(Cycles(1))); |
| owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); |
| } |
| |
| // If trace is completely read, readyList is empty and depGraph is empty, |
| // set execComplete to true |
| if (depGraph.empty() && readyList.empty() && traceComplete && |
| !hwResource.awaitingResponse()) { |
| DPRINTF(TraceCPUData, "\tExecution Complete!\n"); |
| execComplete = true; |
| dataLastTick = curTick(); |
| } |
| } |
| |
| PacketPtr |
| TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) |
| { |
| |
| DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, " |
| "virt addr %d, pc %#x, size %d, flags %d).\n", |
| node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr, |
| node_ptr->pc, node_ptr->size, node_ptr->flags); |
| |
| // If the request is strictly ordered, do not send it. Just return nullptr |
| // as if it was succesfully sent. |
| if (node_ptr->isStrictlyOrdered()) { |
| node_ptr->isLoad() ? ++numSOLoads : ++numSOStores; |
| DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", |
| node_ptr->seqNum); |
| return nullptr; |
| } |
| |
| // Check if the request spans two cache lines as this condition triggers |
| // an assert fail in the L1 cache. If it does then truncate the size to |
| // access only until the end of that line and ignore the remainder. The |
| // stat counting this is useful to keep a check on how frequently this |
| // happens. If required the code could be revised to mimick splitting such |
| // a request into two. |
| unsigned blk_size = owner.cacheLineSize(); |
| Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); |
| if (!(blk_offset + node_ptr->size <= blk_size)) { |
| node_ptr->size = blk_size - blk_offset; |
| ++numSplitReqs; |
| } |
| |
| // Create a request and the packet containing request |
| auto req = std::make_shared<Request>( |
| node_ptr->physAddr, node_ptr->size, node_ptr->flags, masterID); |
| req->setReqInstSeqNum(node_ptr->seqNum); |
| |
| req->setPC(node_ptr->pc); |
| // If virtual address is valid, set the virtual address field |
| // of the request. |
| if (node_ptr->virtAddr != 0) { |
| req->setVirt(node_ptr->virtAddr, node_ptr->size, |
| node_ptr->flags, masterID, node_ptr->pc); |
| req->setPaddr(node_ptr->physAddr); |
| req->setReqInstSeqNum(node_ptr->seqNum); |
| } |
| |
| PacketPtr pkt; |
| uint8_t* pkt_data = new uint8_t[req->getSize()]; |
| if (node_ptr->isLoad()) { |
| pkt = Packet::createRead(req); |
| } else { |
| pkt = Packet::createWrite(req); |
| memset(pkt_data, 0xA, req->getSize()); |
| } |
| pkt->dataDynamic(pkt_data); |
| |
| // Call MasterPort method to send a timing request for this packet |
| bool success = port.sendTimingReq(pkt); |
| ++numSendAttempted; |
| |
| if (!success) { |
| // If it fails, return the packet to retry when a retry is signalled by |
| // the cache |
| ++numSendFailed; |
| DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); |
| return pkt; |
| } else { |
| // It is succeeds, return nullptr |
| ++numSendSucceeded; |
| return nullptr; |
| } |
| } |
| |
| bool |
| TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) |
| { |
| // Assert the node is dependency-free |
| assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); |
| |
| // If this is the first attempt, print a debug message to indicate this. |
| if (first) { |
| DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" |
| " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(), |
| node_ptr->robNum); |
| } |
| |
| // Check if resources are available to issue the specific node |
| if (hwResource.isAvailable(node_ptr)) { |
| // If resources are free only then add to readyList |
| DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" |
| " to readyList, occupying resources.\n", node_ptr->seqNum); |
| // Compute the execute tick by adding the compute delay for the node |
| // and add the ready node to the ready list |
| addToSortedReadyList(node_ptr->seqNum, |
| owner.clockEdge() + node_ptr->compDelay); |
| // Account for the resources taken up by this issued node. |
| hwResource.occupy(node_ptr); |
| return true; |
| |
| } else { |
| if (first) { |
| // Although dependencies are complete, resources are not available. |
| DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." |
| " Adding to depFreeQueue.\n", node_ptr->seqNum); |
| depFreeQueue.push(node_ptr); |
| } else { |
| DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " |
| "Still pending issue.\n", node_ptr->seqNum); |
| } |
| return false; |
| } |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) |
| { |
| // Release the resources for this completed node. |
| if (pkt->isWrite()) { |
| // Consider store complete. |
| hwResource.releaseStoreBuffer(); |
| // If it is a store response then do nothing since we do not model |
| // dependencies on store completion in the trace. But if we were |
| // blocking execution due to store buffer fullness, we need to schedule |
| // an event and attempt to progress. |
| } else { |
| // If it is a load response then release the dependents waiting on it. |
| // Get pointer to the completed load |
| auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); |
| assert(graph_itr != depGraph.end()); |
| GraphNode* node_ptr = graph_itr->second; |
| |
| // Release resources occupied by the load |
| hwResource.release(node_ptr); |
| |
| DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" |
| " dependents..\n", node_ptr->seqNum); |
| |
| for (auto child : node_ptr->dependents) { |
| if (child->removeDepOnInst(node_ptr->seqNum)) { |
| checkAndIssue(child); |
| } |
| } |
| |
| // clear the dynamically allocated set of dependents |
| (node_ptr->dependents).clear(); |
| // Update the stat for numOps completed |
| owner.updateNumOps(node_ptr->robNum); |
| // delete node |
| delete node_ptr; |
| // remove from graph |
| depGraph.erase(graph_itr); |
| } |
| |
| if (DTRACE(TraceCPUData)) { |
| printReadyList(); |
| } |
| |
| // If the size of the dependency graph is less than the dependency window |
| // then read from the trace file to populate the graph next time we are in |
| // execute. |
| if (depGraph.size() < windowSize && !traceComplete) |
| nextRead = true; |
| |
| // If not waiting for retry, attempt to schedule next event |
| if (!retryPkt) { |
| // We might have new dep-free nodes in the list which will have execute |
| // tick greater than or equal to curTick. But a new dep-free node might |
| // have its execute tick earlier. Therefore, attempt to reschedule. It |
| // could happen that the readyList is empty and we got here via a |
| // last remaining response. So, either the trace is complete or there |
| // are pending nodes in the depFreeQueue. The checking is done in the |
| // execute() control flow, so schedule an event to go via that flow. |
| Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : |
| std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); |
| DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", |
| next_event_tick); |
| owner.schedDcacheNextEvent(next_event_tick); |
| } |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, |
| Tick exec_tick) |
| { |
| ReadyNode ready_node; |
| ready_node.seqNum = seq_num; |
| ready_node.execTick = exec_tick; |
| |
| // Iterator to readyList |
| auto itr = readyList.begin(); |
| |
| // If the readyList is empty, simply insert the new node at the beginning |
| // and return |
| if (itr == readyList.end()) { |
| readyList.insert(itr, ready_node); |
| maxReadyListSize = std::max<double>(readyList.size(), |
| maxReadyListSize.value()); |
| return; |
| } |
| |
| // If the new node has its execution tick equal to the first node in the |
| // list then go to the next node. If the first node in the list failed |
| // to execute, its position as the first is thus maintained. |
| if (retryPkt) |
| if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) |
| itr++; |
| |
| // Increment the iterator and compare the node pointed to by it to the new |
| // node till the position to insert the new node is found. |
| bool found = false; |
| while (!found && itr != readyList.end()) { |
| // If the execution tick of the new node is less than the node then |
| // this is the position to insert |
| if (exec_tick < itr->execTick) |
| found = true; |
| // If the execution tick of the new node is equal to the node then |
| // sort in ascending order of sequence numbers |
| else if (exec_tick == itr->execTick) { |
| // If the sequence number of the new node is less than the node |
| // then this is the position to insert |
| if (seq_num < itr->seqNum) |
| found = true; |
| // Else go to next node |
| else |
| itr++; |
| } |
| // If the execution tick of the new node is greater than the node then |
| // go to the next node |
| else |
| itr++; |
| } |
| readyList.insert(itr, ready_node); |
| // Update the stat for max size reached of the readyList |
| maxReadyListSize = std::max<double>(readyList.size(), |
| maxReadyListSize.value()); |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::printReadyList() { |
| |
| auto itr = readyList.begin(); |
| if (itr == readyList.end()) { |
| DPRINTF(TraceCPUData, "readyList is empty.\n"); |
| return; |
| } |
| DPRINTF(TraceCPUData, "Printing readyList:\n"); |
| while (itr != readyList.end()) { |
| auto graph_itr = depGraph.find(itr->seqNum); |
| GraphNode* node_ptr M5_VAR_USED = graph_itr->second; |
| DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, |
| node_ptr->typeToStr(), itr->execTick); |
| itr++; |
| } |
| } |
| |
| TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( |
| uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) |
| : sizeROB(max_rob), |
| sizeStoreBuffer(max_stores), |
| sizeLoadBuffer(max_loads), |
| oldestInFlightRobNum(UINT64_MAX), |
| numInFlightLoads(0), |
| numInFlightStores(0) |
| {} |
| |
| void |
| TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) |
| { |
| // Occupy ROB entry for the issued node |
| // Merely maintain the oldest node, i.e. numerically least robNum by saving |
| // it in the variable oldestInFLightRobNum. |
| inFlightNodes[new_node->seqNum] = new_node->robNum; |
| oldestInFlightRobNum = inFlightNodes.begin()->second; |
| |
| // Occupy Load/Store Buffer entry for the issued node if applicable |
| if (new_node->isLoad()) { |
| ++numInFlightLoads; |
| } else if (new_node->isStore()) { |
| ++numInFlightStores; |
| } // else if it is a non load/store node, no buffer entry is occupied |
| |
| printOccupancy(); |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) |
| { |
| assert(!inFlightNodes.empty()); |
| DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", |
| done_node->seqNum); |
| |
| assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); |
| inFlightNodes.erase(done_node->seqNum); |
| |
| if (inFlightNodes.empty()) { |
| // If we delete the only in-flight node and then the |
| // oldestInFlightRobNum is set to it's initialized (max) value. |
| oldestInFlightRobNum = UINT64_MAX; |
| } else { |
| // Set the oldest in-flight node rob number equal to the first node in |
| // the inFlightNodes since that will have the numerically least value. |
| oldestInFlightRobNum = inFlightNodes.begin()->second; |
| } |
| |
| DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " |
| "oldestInFlightRobNum = %d\n", inFlightNodes.size(), |
| oldestInFlightRobNum); |
| |
| // A store is considered complete when a request is sent, thus ROB entry is |
| // freed. But it occupies an entry in the Store Buffer until its response |
| // is received. A load is considered complete when a response is received, |
| // thus both ROB and Load Buffer entries can be released. |
| if (done_node->isLoad()) { |
| assert(numInFlightLoads != 0); |
| --numInFlightLoads; |
| } |
| // For normal writes, we send the requests out and clear a store buffer |
| // entry on response. For writes which are strictly ordered, for e.g. |
| // writes to device registers, we do that within release() which is called |
| // when node is executed and taken off from readyList. |
| if (done_node->isStore() && done_node->isStrictlyOrdered()) { |
| releaseStoreBuffer(); |
| } |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() |
| { |
| assert(numInFlightStores != 0); |
| --numInFlightStores; |
| } |
| |
| bool |
| TraceCPU::ElasticDataGen::HardwareResource::isAvailable( |
| const GraphNode* new_node) const |
| { |
| uint16_t num_in_flight_nodes; |
| if (inFlightNodes.empty()) { |
| num_in_flight_nodes = 0; |
| DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" |
| " #in-flight nodes = 0", new_node->seqNum); |
| } else if (new_node->robNum > oldestInFlightRobNum) { |
| // This is the intuitive case where new dep-free node is younger |
| // instruction than the oldest instruction in-flight. Thus we make sure |
| // in_flight_nodes does not overflow. |
| num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; |
| DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" |
| " #in-flight nodes = %d - %d = %d", new_node->seqNum, |
| new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); |
| } else { |
| // This is the case where an instruction older than the oldest in- |
| // flight instruction becomes dep-free. Thus we must have already |
| // accounted for the entry in ROB for this new dep-free node. |
| // Immediately after this check returns true, oldestInFlightRobNum will |
| // be updated in occupy(). We simply let this node issue now. |
| num_in_flight_nodes = 0; |
| DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" |
| " new oldestInFlightRobNum = %d, #in-flight nodes ignored", |
| new_node->seqNum, new_node->robNum); |
| } |
| DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", |
| numInFlightLoads, sizeLoadBuffer, |
| numInFlightStores, sizeStoreBuffer); |
| // Check if resources are available to issue the specific node |
| if (num_in_flight_nodes >= sizeROB) { |
| return false; |
| } |
| if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) { |
| return false; |
| } |
| if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) { |
| return false; |
| } |
| return true; |
| } |
| |
| bool |
| TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { |
| // Return true if there is at least one read or write request in flight |
| return (numInFlightStores != 0 || numInFlightLoads != 0); |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { |
| DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " |
| "LQ = %d/%d, SQ = %d/%d.\n", |
| oldestInFlightRobNum, |
| numInFlightLoads, sizeLoadBuffer, |
| numInFlightStores, sizeStoreBuffer); |
| } |
| |
| void |
| TraceCPU::FixedRetryGen::regStats() |
| { |
| using namespace Stats; |
| |
| numSendAttempted |
| .name(name() + ".numSendAttempted") |
| .desc("Number of first attempts to send a request") |
| ; |
| |
| numSendSucceeded |
| .name(name() + ".numSendSucceeded") |
| .desc("Number of successful first attempts") |
| ; |
| |
| numSendFailed |
| .name(name() + ".numSendFailed") |
| .desc("Number of failed first attempts") |
| ; |
| |
| numRetrySucceeded |
| .name(name() + ".numRetrySucceeded") |
| .desc("Number of successful retries") |
| ; |
| |
| instLastTick |
| .name(name() + ".instLastTick") |
| .desc("Last tick simulated from the fixed inst trace") |
| ; |
| } |
| |
| Tick |
| TraceCPU::FixedRetryGen::init() |
| { |
| DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" |
| " IcacheGen: fixed issue with retry.\n"); |
| |
| if (nextExecute()) { |
| DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); |
| return currElement.tick; |
| } else { |
| panic("Read of first message in the trace failed.\n"); |
| return MaxTick; |
| } |
| } |
| |
| bool |
| TraceCPU::FixedRetryGen::tryNext() |
| { |
| // If there is a retry packet, try to send it |
| if (retryPkt) { |
| |
| DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); |
| |
| if (!port.sendTimingReq(retryPkt)) { |
| // Still blocked! This should never occur. |
| DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); |
| return false; |
| } |
| ++numRetrySucceeded; |
| } else { |
| |
| DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); |
| |
| // try sending current element |
| assert(currElement.isValid()); |
| |
| ++numSendAttempted; |
| |
| if (!send(currElement.addr, currElement.blocksize, |
| currElement.cmd, currElement.flags, currElement.pc)) { |
| DPRINTF(TraceCPUInst, "currElement sending failed.\n"); |
| ++numSendFailed; |
| // return false to indicate not to schedule next event |
| return false; |
| } else { |
| ++numSendSucceeded; |
| } |
| } |
| // If packet was sent successfully, either retryPkt or currElement, return |
| // true to indicate to schedule event at current Tick plus delta. If packet |
| // was sent successfully and there is no next packet to send, return false. |
| DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " |
| "element.\n"); |
| retryPkt = nullptr; |
| // Read next element into currElement, currElement gets cleared so save the |
| // tick to calculate delta |
| Tick last_tick = currElement.tick; |
| if (nextExecute()) { |
| assert(currElement.tick >= last_tick); |
| delta = currElement.tick - last_tick; |
| } |
| return !traceComplete; |
| } |
| |
| void |
| TraceCPU::FixedRetryGen::exit() |
| { |
| trace.reset(); |
| } |
| |
| bool |
| TraceCPU::FixedRetryGen::nextExecute() |
| { |
| if (traceComplete) |
| // We are at the end of the file, thus we have no more messages. |
| // Return false. |
| return false; |
| |
| |
| //Reset the currElement to the default values |
| currElement.clear(); |
| |
| // Read the next line to get the next message. If that fails then end of |
| // trace has been reached and traceComplete needs to be set in addition |
| // to returning false. If successful then next message is in currElement. |
| if (!trace.read(&currElement)) { |
| traceComplete = true; |
| instLastTick = curTick(); |
| return false; |
| } |
| |
| DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", |
| currElement.cmd.isRead() ? 'r' : 'w', |
| currElement.addr, |
| currElement.pc, |
| currElement.blocksize, |
| currElement.tick); |
| |
| return true; |
| } |
| |
| bool |
| TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, |
| Request::FlagsType flags, Addr pc) |
| { |
| |
| // Create new request |
| auto req = std::make_shared<Request>(addr, size, flags, masterID); |
| req->setPC(pc); |
| |
| // If this is not done it triggers assert in L1 cache for invalid contextId |
| req->setContext(ContextID(0)); |
| |
| // Embed it in a packet |
| PacketPtr pkt = new Packet(req, cmd); |
| |
| uint8_t* pkt_data = new uint8_t[req->getSize()]; |
| pkt->dataDynamic(pkt_data); |
| |
| if (cmd.isWrite()) { |
| memset(pkt_data, 0xA, req->getSize()); |
| } |
| |
| // Call MasterPort method to send a timing request for this packet |
| bool success = port.sendTimingReq(pkt); |
| if (!success) { |
| // If it fails, save the packet to retry when a retry is signalled by |
| // the cache |
| retryPkt = pkt; |
| } |
| return success; |
| } |
| |
| void |
| TraceCPU::icacheRetryRecvd() |
| { |
| // Schedule an event to go through the control flow in the same tick as |
| // retry is received |
| DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" |
| " event @%lli.\n", curTick()); |
| schedule(icacheNextEvent, curTick()); |
| } |
| |
| void |
| TraceCPU::dcacheRetryRecvd() |
| { |
| // Schedule an event to go through the execute flow in the same tick as |
| // retry is received |
| DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" |
| " event @%lli.\n", curTick()); |
| schedule(dcacheNextEvent, curTick()); |
| } |
| |
| void |
| TraceCPU::schedDcacheNextEvent(Tick when) |
| { |
| if (!dcacheNextEvent.scheduled()) { |
| DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", |
| when); |
| schedule(dcacheNextEvent, when); |
| ++numSchedDcacheEvent; |
| } else if (when < dcacheNextEvent.when()) { |
| DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" |
| " to %lli.\n", dcacheNextEvent.when(), when); |
| reschedule(dcacheNextEvent, when); |
| } |
| |
| } |
| |
| bool |
| TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) |
| { |
| // All responses on the instruction fetch side are ignored. Simply delete |
| // the packet to free allocated memory |
| delete pkt; |
| |
| return true; |
| } |
| |
| void |
| TraceCPU::IcachePort::recvReqRetry() |
| { |
| owner->icacheRetryRecvd(); |
| } |
| |
| void |
| TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) |
| { |
| DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); |
| dcacheGen.completeMemAccess(pkt); |
| } |
| |
| bool |
| TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) |
| { |
| // Handle the responses for data memory requests which is done inside the |
| // elastic data generator |
| owner->dcacheRecvTimingResp(pkt); |
| // After processing the response delete the packet to free |
| // memory |
| delete pkt; |
| |
| return true; |
| } |
| |
| void |
| TraceCPU::DcachePort::recvReqRetry() |
| { |
| owner->dcacheRetryRecvd(); |
| } |
| |
| TraceCPU::ElasticDataGen::InputStream::InputStream( |
| const std::string& filename, |
| const double time_multiplier) |
| : trace(filename), |
| timeMultiplier(time_multiplier), |
| microOpCount(0) |
| { |
| // Create a protobuf message for the header and read it from the stream |
| ProtoMessage::InstDepRecordHeader header_msg; |
| if (!trace.read(header_msg)) { |
| panic("Failed to read packet header from %s\n", filename); |
| |
| if (header_msg.tick_freq() != SimClock::Frequency) { |
| panic("Trace %s was recorded with a different tick frequency %d\n", |
| header_msg.tick_freq()); |
| } |
| } else { |
| // Assign window size equal to the field in the trace that was recorded |
| // when the data dependency trace was captured in the o3cpu model |
| windowSize = header_msg.window_size(); |
| } |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::InputStream::reset() |
| { |
| trace.reset(); |
| } |
| |
| bool |
| TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) |
| { |
| ProtoMessage::InstDepRecord pkt_msg; |
| if (trace.read(pkt_msg)) { |
| // Required fields |
| element->seqNum = pkt_msg.seq_num(); |
| element->type = pkt_msg.type(); |
| // Scale the compute delay to effectively scale the Trace CPU frequency |
| element->compDelay = pkt_msg.comp_delay() * timeMultiplier; |
| |
| // Repeated field robDepList |
| element->clearRobDep(); |
| assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); |
| for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { |
| element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); |
| element->numRobDep += 1; |
| } |
| |
| // Repeated field |
| element->clearRegDep(); |
| assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); |
| for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { |
| // There is a possibility that an instruction has both, a register |
| // and order dependency on an instruction. In such a case, the |
| // register dependency is omitted |
| bool duplicate = false; |
| for (int j = 0; j < element->numRobDep; j++) { |
| duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]); |
| } |
| if (!duplicate) { |
| element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); |
| element->numRegDep += 1; |
| } |
| } |
| |
| // Optional fields |
| if (pkt_msg.has_p_addr()) |
| element->physAddr = pkt_msg.p_addr(); |
| else |
| element->physAddr = 0; |
| |
| if (pkt_msg.has_v_addr()) |
| element->virtAddr = pkt_msg.v_addr(); |
| else |
| element->virtAddr = 0; |
| |
| if (pkt_msg.has_size()) |
| element->size = pkt_msg.size(); |
| else |
| element->size = 0; |
| |
| if (pkt_msg.has_flags()) |
| element->flags = pkt_msg.flags(); |
| else |
| element->flags = 0; |
| |
| if (pkt_msg.has_pc()) |
| element->pc = pkt_msg.pc(); |
| else |
| element->pc = 0; |
| |
| // ROB occupancy number |
| ++microOpCount; |
| if (pkt_msg.has_weight()) { |
| microOpCount += pkt_msg.weight(); |
| } |
| element->robNum = microOpCount; |
| return true; |
| } |
| |
| // We have reached the end of the file |
| return false; |
| } |
| |
| bool |
| TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) |
| { |
| for (auto& own_reg_dep : regDep) { |
| if (own_reg_dep == reg_dep) { |
| // If register dependency is found, make it zero and return true |
| own_reg_dep = 0; |
| assert(numRegDep > 0); |
| --numRegDep; |
| DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " |
| "done.\n", seqNum, reg_dep); |
| return true; |
| } |
| } |
| |
| // Return false if the dependency is not found |
| return false; |
| } |
| |
| bool |
| TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) |
| { |
| for (auto& own_rob_dep : robDep) { |
| if (own_rob_dep == rob_dep) { |
| // If the rob dependency is found, make it zero and return true |
| own_rob_dep = 0; |
| assert(numRobDep > 0); |
| --numRobDep; |
| DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " |
| "done.\n", seqNum, rob_dep); |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { |
| for (auto& own_reg_dep : regDep) { |
| own_reg_dep = 0; |
| } |
| numRegDep = 0; |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { |
| for (auto& own_rob_dep : robDep) { |
| own_rob_dep = 0; |
| } |
| numRobDep = 0; |
| } |
| |
| bool |
| TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) |
| { |
| // If it is an rob dependency then remove it |
| if (!removeRobDep(done_seq_num)) { |
| // If it is not an rob dependency then it must be a register dependency |
| // If the register dependency is not found, it violates an assumption |
| // and must be caught by assert. |
| bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); |
| assert(regdep_found); |
| } |
| // Return true if the node is dependency free |
| return (numRobDep == 0 && numRegDep == 0); |
| } |
| |
| void |
| TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const |
| { |
| DPRINTFR(TraceCPUData, "%lli", seqNum); |
| DPRINTFR(TraceCPUData, ",%s", typeToStr()); |
| if (isLoad() || isStore()) { |
| DPRINTFR(TraceCPUData, ",%i", physAddr); |
| DPRINTFR(TraceCPUData, ",%i", size); |
| DPRINTFR(TraceCPUData, ",%i", flags); |
| } |
| DPRINTFR(TraceCPUData, ",%lli", compDelay); |
| int i = 0; |
| DPRINTFR(TraceCPUData, "robDep:"); |
| while (robDep[i] != 0) { |
| DPRINTFR(TraceCPUData, ",%lli", robDep[i]); |
| i++; |
| } |
| i = 0; |
| DPRINTFR(TraceCPUData, "regDep:"); |
| while (regDep[i] != 0) { |
| DPRINTFR(TraceCPUData, ",%lli", regDep[i]); |
| i++; |
| } |
| auto child_itr = dependents.begin(); |
| DPRINTFR(TraceCPUData, "dependents:"); |
| while (child_itr != dependents.end()) { |
| DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum); |
| child_itr++; |
| } |
| |
| DPRINTFR(TraceCPUData, "\n"); |
| } |
| |
| std::string |
| TraceCPU::ElasticDataGen::GraphNode::typeToStr() const |
| { |
| return Record::RecordType_Name(type); |
| } |
| |
| TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) |
| : trace(filename) |
| { |
| // Create a protobuf message for the header and read it from the stream |
| ProtoMessage::PacketHeader header_msg; |
| if (!trace.read(header_msg)) { |
| panic("Failed to read packet header from %s\n", filename); |
| |
| if (header_msg.tick_freq() != SimClock::Frequency) { |
| panic("Trace %s was recorded with a different tick frequency %d\n", |
| header_msg.tick_freq()); |
| } |
| } |
| } |
| |
| void |
| TraceCPU::FixedRetryGen::InputStream::reset() |
| { |
| trace.reset(); |
| } |
| |
| bool |
| TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) |
| { |
| ProtoMessage::Packet pkt_msg; |
| if (trace.read(pkt_msg)) { |
| element->cmd = pkt_msg.cmd(); |
| element->addr = pkt_msg.addr(); |
| element->blocksize = pkt_msg.size(); |
| element->tick = pkt_msg.tick(); |
| element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; |
| element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; |
| return true; |
| } |
| |
| // We have reached the end of the file |
| return false; |
| } |