src/cpu/trace/trace_cpu.cc - public/gem5 - Git at Google

 /*
  * Copyright (c) 2013 - 2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
  * not be construed as granting a license to any other intellectual
  * property including but not limited to intellectual property relating
  * to a hardware implementation of the functionality of the software
  * licensed hereunder.  You may use the software subject to the license
  * terms below provided that you ensure that this notice is replicated
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met: redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer;
  * redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution;
  * neither the name of the copyright holders nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Radhika Jagtap
  *          Andreas Hansson
  *          Thomas Grass
  */

 #include "cpu/trace/trace_cpu.hh"

 #include "sim/sim_exit.hh"

 // Declare and initialize the static counter for number of trace CPUs.
 int TraceCPU::numTraceCPUs = 0;

 TraceCPU::TraceCPU(TraceCPUParams *params)
     :   BaseCPU(params),
         icachePort(this),
         dcachePort(this),
         instMasterID(params->system->getMasterId(this, "inst")),
         dataMasterID(params->system->getMasterId(this, "data")),
         instTraceFile(params->instTraceFile),
         dataTraceFile(params->dataTraceFile),
         icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
         dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
                   params),
         icacheNextEvent([this]{ schedIcacheNext(); }, name()),
         dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
         oneTraceComplete(false),
         traceOffset(0),
         execCompleteEvent(nullptr),
         enableEarlyExit(params->enableEarlyExit),
         progressMsgInterval(params->progressMsgInterval),
         progressMsgThreshold(params->progressMsgInterval)
 {
     // Increment static counter for number of Trace CPUs.
     ++TraceCPU::numTraceCPUs;

     // Check that the python parameters for sizes of ROB, store buffer and
     // load buffer do not overflow the corresponding C++ variables.
     fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
                 "max. value of %d.\n", params->sizeROB, UINT16_MAX);
     fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
                 "exceeds the max. value of %d.\n", params->sizeROB,
                 UINT16_MAX);
     fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
                 " %d exceeds the max. value of %d.\n",
                 params->sizeLoadBuffer, UINT16_MAX);
 }

 TraceCPU::~TraceCPU()
 {

 }

 TraceCPU*
 TraceCPUParams::create()
 {
     return new TraceCPU(this);
 }

 void
 TraceCPU::updateNumOps(uint64_t rob_num)
 {
     numOps = rob_num;
     if (progressMsgInterval != 0 && numOps.value() >= progressMsgThreshold) {
         inform("%s: %i insts committed\n", name(), progressMsgThreshold);
         progressMsgThreshold += progressMsgInterval;
     }
 }

 void
 TraceCPU::takeOverFrom(BaseCPU *oldCPU)
 {
     // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
     getInstPort().takeOverFrom(&oldCPU->getInstPort());
     getDataPort().takeOverFrom(&oldCPU->getDataPort());
 }

 void
 TraceCPU::init()
 {
     DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
             "\n", instTraceFile);
     DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
             dataTraceFile);

     BaseCPU::init();

     // Get the send tick of the first instruction read request
     Tick first_icache_tick = icacheGen.init();

     // Get the send tick of the first data read/write request
     Tick first_dcache_tick = dcacheGen.init();

     // Set the trace offset as the minimum of that in both traces
     traceOffset = std::min(first_icache_tick, first_dcache_tick);
     inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
             name(), traceOffset);

     // Schedule next icache and dcache event by subtracting the offset
     schedule(icacheNextEvent, first_icache_tick - traceOffset);
     schedule(dcacheNextEvent, first_dcache_tick - traceOffset);

     // Adjust the trace offset for the dcache generator's ready nodes
     // We don't need to do this for the icache generator as it will
     // send its first request at the first event and schedule subsequent
     // events using a relative tick delta
     dcacheGen.adjustInitTraceOffset(traceOffset);

     // If the Trace CPU simulation is configured to exit on any one trace
     // completion then we don't need a counted event to count down all Trace
     // CPUs in the system. If not then instantiate a counted event.
     if (!enableEarlyExit) {
         // The static counter for number of Trace CPUs is correctly set at
         // this point so create an event and pass it.
         execCompleteEvent = new CountedExitEvent("end of all traces reached.",
                                                  numTraceCPUs);
     }

 }

 void
 TraceCPU::schedIcacheNext()
 {
     DPRINTF(TraceCPUInst, "IcacheGen event.\n");

     // Try to send the current packet or a retry packet if there is one
     bool sched_next = icacheGen.tryNext();
     // If packet sent successfully, schedule next event
     if (sched_next) {
         DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
                 "at %d.\n", curTick() + icacheGen.tickDelta());
         schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
         ++numSchedIcacheEvent;
     } else {
         // check if traceComplete. If not, do nothing because sending failed
         // and next event will be scheduled via RecvRetry()
         if (icacheGen.isTraceComplete()) {
             // If this is the first trace to complete, set the variable. If it
             // is already set then both traces are complete to exit sim.
             checkAndSchedExitEvent();
         }
     }
     return;
 }

 void
 TraceCPU::schedDcacheNext()
 {
     DPRINTF(TraceCPUData, "DcacheGen event.\n");

     // Update stat for numCycles
     numCycles = clockEdge() / clockPeriod();

     dcacheGen.execute();
     if (dcacheGen.isExecComplete()) {
         checkAndSchedExitEvent();
     }
 }

 void
 TraceCPU::checkAndSchedExitEvent()
 {
     if (!oneTraceComplete) {
         oneTraceComplete = true;
     } else {
         // Schedule event to indicate execution is complete as both
         // instruction and data access traces have been played back.
         inform("%s: Execution complete.\n", name());
         // If the replay is configured to exit early, that is when any one
         // execution is complete then exit immediately and return. Otherwise,
         // schedule the counted exit that counts down completion of each Trace
         // CPU.
         if (enableEarlyExit) {
             exitSimLoop("End of trace reached");
         } else {
             schedule(*execCompleteEvent, curTick());
         }
     }
 }

 void
 TraceCPU::regStats()
 {

     BaseCPU::regStats();

     numSchedDcacheEvent
     .name(name() + ".numSchedDcacheEvent")
     .desc("Number of events scheduled to trigger data request generator")
     ;

     numSchedIcacheEvent
     .name(name() + ".numSchedIcacheEvent")
     .desc("Number of events scheduled to trigger instruction request generator")
     ;

     numOps
     .name(name() + ".numOps")
     .desc("Number of micro-ops simulated by the Trace CPU")
     ;

     cpi
     .name(name() + ".cpi")
     .desc("Cycles per micro-op used as a proxy for CPI")
     .precision(6)
     ;
     cpi = numCycles/numOps;

     icacheGen.regStats();
     dcacheGen.regStats();
 }

 void
 TraceCPU::ElasticDataGen::regStats()
 {
     using namespace Stats;

     maxDependents
     .name(name() + ".maxDependents")
     .desc("Max number of dependents observed on a node")
     ;

     maxReadyListSize
     .name(name() + ".maxReadyListSize")
     .desc("Max size of the ready list observed")
     ;

     numSendAttempted
     .name(name() + ".numSendAttempted")
     .desc("Number of first attempts to send a request")
     ;

     numSendSucceeded
     .name(name() + ".numSendSucceeded")
     .desc("Number of successful first attempts")
     ;

     numSendFailed
     .name(name() + ".numSendFailed")
     .desc("Number of failed first attempts")
     ;

     numRetrySucceeded
     .name(name() + ".numRetrySucceeded")
     .desc("Number of successful retries")
     ;

     numSplitReqs
     .name(name() + ".numSplitReqs")
     .desc("Number of split requests")
     ;

     numSOLoads
     .name(name() + ".numSOLoads")
     .desc("Number of strictly ordered loads")
     ;

     numSOStores
     .name(name() + ".numSOStores")
     .desc("Number of strictly ordered stores")
     ;

     dataLastTick
     .name(name() + ".dataLastTick")
     .desc("Last tick simulated from the elastic data trace")
     ;
 }

 Tick
 TraceCPU::ElasticDataGen::init()
 {
     DPRINTF(TraceCPUData, "Initializing data memory request generator "
             "DcacheGen: elastic issue with retry.\n");

     if (!readNextWindow())
         panic("Trace has %d elements. It must have at least %d elements.\n",
               depGraph.size(), 2 * windowSize);
     DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
             depGraph.size());

     if (!readNextWindow())
         panic("Trace has %d elements. It must have at least %d elements.\n",
               depGraph.size(), 2 * windowSize);
     DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
             depGraph.size());

     // Print readyList
     if (DTRACE(TraceCPUData)) {
         printReadyList();
     }
     auto free_itr = readyList.begin();
     DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
             " is %d.\n", free_itr->seqNum, free_itr->execTick);
     // Return the execute tick of the earliest ready node so that an event
     // can be scheduled to call execute()
     return (free_itr->execTick);
 }

 void
 TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
     for (auto& free_node : readyList) {
         free_node.execTick -= offset;
     }
 }

 void
 TraceCPU::ElasticDataGen::exit()
 {
     trace.reset();
 }

 bool
 TraceCPU::ElasticDataGen::readNextWindow()
 {

     // Read and add next window
     DPRINTF(TraceCPUData, "Reading next window from file.\n");

     if (traceComplete) {
         // We are at the end of the file, thus we have no more records.
         // Return false.
         return false;
     }

     DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
             depGraph.size());

     uint32_t num_read = 0;
     while (num_read != windowSize) {

         // Create a new graph node
         GraphNode* new_node = new GraphNode;

         // Read the next line to get the next record. If that fails then end of
         // trace has been reached and traceComplete needs to be set in addition
         // to returning false.
         if (!trace.read(new_node)) {
             DPRINTF(TraceCPUData, "\tTrace complete!\n");
             traceComplete = true;
             return false;
         }

         // Annotate the ROB dependencies of the new node onto the parent nodes.
         addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
         // Annotate the register dependencies of the new node onto the parent
         // nodes.
         addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);

         num_read++;
         // Add to map
         depGraph[new_node->seqNum] = new_node;
         if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
             // Source dependencies are already complete, check if resources
             // are available and issue. The execution time is approximated
             // to current time plus the computational delay.
             checkAndIssue(new_node);
         }
     }

     DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
             depGraph.size());
     return true;
 }

 template<typename T> void
 TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
                                             T& dep_array, uint8_t& num_dep)
 {
     for (auto& a_dep : dep_array) {
         // The convention is to set the dependencies starting with the first
         // index in the ROB and register dependency arrays. Thus, when we reach
         // a dependency equal to the initialisation value of zero, we know have
         // iterated over all dependencies and can break.
         if (a_dep == 0)
             break;
         // We look up the valid dependency, i.e. the parent of this node
         auto parent_itr = depGraph.find(a_dep);
         if (parent_itr != depGraph.end()) {
             // If the parent is found, it is yet to be executed. Append a
             // pointer to the new node to the dependents list of the parent
             // node.
             parent_itr->second->dependents.push_back(new_node);
             auto num_depts = parent_itr->second->dependents.size();
             maxDependents = std::max<double>(num_depts, maxDependents.value());
         } else {
             // The dependency is not found in the graph. So consider
             // the execution of the parent is complete, i.e. remove this
             // dependency.
             a_dep = 0;
             num_dep--;
         }
     }
 }

 void
 TraceCPU::ElasticDataGen::execute()
 {
     DPRINTF(TraceCPUData, "Execute start occupancy:\n");
     DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
             "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
             depFreeQueue.size());
     hwResource.printOccupancy();

     // Read next window to make sure that dependents of all dep-free nodes
     // are in the depGraph
     if (nextRead) {
         readNextWindow();
         nextRead = false;
     }

     // First attempt to issue the pending dependency-free nodes held
     // in depFreeQueue. If resources have become available for a node,
     // then issue it, i.e. add the node to readyList.
     while (!depFreeQueue.empty()) {
         if (checkAndIssue(depFreeQueue.front(), false)) {
             DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
                 "%lli.\n", (depFreeQueue.front())->seqNum);
             depFreeQueue.pop();
         } else {
             break;
         }
     }
     // Proceed to execute from readyList
     auto graph_itr = depGraph.begin();
     auto free_itr = readyList.begin();
     // Iterate through readyList until the next free node has its execute
     // tick later than curTick or the end of readyList is reached
     while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {

         // Get pointer to the node to be executed
         graph_itr = depGraph.find(free_itr->seqNum);
         assert(graph_itr != depGraph.end());
         GraphNode* node_ptr = graph_itr->second;

         // If there is a retryPkt send that else execute the load
         if (retryPkt) {
             // The retryPkt must be the request that was created by the
             // first node in the readyList.
             if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
                 panic("Retry packet's seqence number does not match "
                       "the first node in the readyList.\n");
             }
             if (port.sendTimingReq(retryPkt)) {
                 ++numRetrySucceeded;
                 retryPkt = nullptr;
             }
         } else if (node_ptr->isLoad() || node_ptr->isStore()) {
             // If there is no retryPkt, attempt to send a memory request in
             // case of a load or store node. If the send fails, executeMemReq()
             // returns a packet pointer, which we save in retryPkt. In case of
             // a comp node we don't do anything and simply continue as if the
             // execution of the comp node succedded.
             retryPkt = executeMemReq(node_ptr);
         }
         // If the retryPkt or a new load/store node failed, we exit from here
         // as a retry from cache will bring the control to execute(). The
         // first node in readyList then, will be the failed node.
         if (retryPkt) {
             break;
         }

         // Proceed to remove dependencies for the successfully executed node.
         // If it is a load which is not strictly ordered and we sent a
         // request for it successfully, we do not yet mark any register
         // dependencies complete. But as per dependency modelling we need
         // to mark ROB dependencies of load and non load/store nodes which
         // are based on successful sending of the load as complete.
         if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
             // If execute succeeded mark its dependents as complete
             DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
                     "dependents..\n", node_ptr->seqNum);

             auto child_itr = (node_ptr->dependents).begin();
             while (child_itr != (node_ptr->dependents).end()) {
                 // ROB dependency of a store on a load must not be removed
                 // after load is sent but after response is received
                 if (!(*child_itr)->isStore() &&
                     (*child_itr)->removeRobDep(node_ptr->seqNum)) {

                     // Check if the child node has become dependency free
                     if ((*child_itr)->numRobDep == 0 &&
                         (*child_itr)->numRegDep == 0) {

                         // Source dependencies are complete, check if
                         // resources are available and issue
                         checkAndIssue(*child_itr);
                     }
                     // Remove this child for the sent load and point to new
                     // location of the element following the erased element
                     child_itr = node_ptr->dependents.erase(child_itr);
                 } else {
                     // This child is not dependency-free, point to the next
                     // child
                     child_itr++;
                 }
             }
         } else {
             // If it is a strictly ordered load mark its dependents as complete
             // as we do not send a request for this case. If it is a store or a
             // comp node we also mark all its dependents complete.
             DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
                     " up dependents..\n", node_ptr->seqNum);

             for (auto child : node_ptr->dependents) {
                 // If the child node is dependency free removeDepOnInst()
                 // returns true.
                 if (child->removeDepOnInst(node_ptr->seqNum)) {
                     // Source dependencies are complete, check if resources
                     // are available and issue
                     checkAndIssue(child);
                 }
             }
         }

         // After executing the node, remove from readyList and delete node.
         readyList.erase(free_itr);
         // If it is a cacheable load which was sent, don't delete
         // just yet.  Delete it in completeMemAccess() after the
         // response is received. If it is an strictly ordered
         // load, it was not sent and all dependencies were simply
         // marked complete. Thus it is safe to delete it. For
         // stores and non load/store nodes all dependencies were
         // marked complete so it is safe to delete it.
         if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
             // Release all resources occupied by the completed node
             hwResource.release(node_ptr);
             // clear the dynamically allocated set of dependents
             (node_ptr->dependents).clear();
             // Update the stat for numOps simulated
             owner.updateNumOps(node_ptr->robNum);
             // delete node
             delete node_ptr;
             // remove from graph
             depGraph.erase(graph_itr);
         }
         // Point to first node to continue to next iteration of while loop
         free_itr = readyList.begin();
     } // end of while loop

     // Print readyList, sizes of queues and resource status after updating
     if (DTRACE(TraceCPUData)) {
         printReadyList();
         DPRINTF(TraceCPUData, "Execute end occupancy:\n");
         DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
                 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
                 depFreeQueue.size());
         hwResource.printOccupancy();
     }

     if (retryPkt) {
         DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
                 "event from the cache for seq. num %lli.\n",
                 retryPkt->req->getReqInstSeqNum());
         return;
     }
     // If the size of the dependency graph is less than the dependency window
     // then read from the trace file to populate the graph next time we are in
     // execute.
     if (depGraph.size() < windowSize && !traceComplete)
         nextRead = true;

     // If cache is not blocked, schedule an event for the first execTick in
     // readyList else retry from cache will schedule the event. If the ready
     // list is empty then check if the next pending node has resources
     // available to issue. If yes, then schedule an event for the next cycle.
     if (!readyList.empty()) {
         Tick next_event_tick = std::max(readyList.begin()->execTick,
                                         curTick());
         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
                 next_event_tick);
         owner.schedDcacheNextEvent(next_event_tick);
     } else if (readyList.empty() && !depFreeQueue.empty() &&
                 hwResource.isAvailable(depFreeQueue.front())) {
         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
                 owner.clockEdge(Cycles(1)));
         owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
     }

     // If trace is completely read, readyList is empty and depGraph is empty,
     // set execComplete to true
     if (depGraph.empty() && readyList.empty() && traceComplete &&
         !hwResource.awaitingResponse()) {
         DPRINTF(TraceCPUData, "\tExecution Complete!\n");
         execComplete = true;
         dataLastTick = curTick();
     }
 }

 PacketPtr
 TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
 {

     DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
             "virt addr %d, pc %#x, size %d, flags %d).\n",
             node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
             node_ptr->pc, node_ptr->size, node_ptr->flags);

     // If the request is strictly ordered, do not send it. Just return nullptr
     // as if it was succesfully sent.
     if (node_ptr->isStrictlyOrdered()) {
         node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
         DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
                 node_ptr->seqNum);
         return nullptr;
     }

     // Check if the request spans two cache lines as this condition triggers
     // an assert fail in the L1 cache. If it does then truncate the size to
     // access only until the end of that line and ignore the remainder. The
     // stat counting this is useful to keep a check on how frequently this
     // happens. If required the code could be revised to mimick splitting such
     // a request into two.
     unsigned blk_size = owner.cacheLineSize();
     Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
     if (!(blk_offset + node_ptr->size <= blk_size)) {
         node_ptr->size = blk_size - blk_offset;
         ++numSplitReqs;
     }

     // Create a request and the packet containing request
     auto req = std::make_shared<Request>(
         node_ptr->physAddr, node_ptr->size,
         node_ptr->flags, masterID, node_ptr->seqNum,
         ContextID(0));

     req->setPC(node_ptr->pc);
     // If virtual address is valid, set the asid and virtual address fields
     // of the request.
     if (node_ptr->virtAddr != 0) {
         req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
                         node_ptr->flags, masterID, node_ptr->pc);
         req->setPaddr(node_ptr->physAddr);
         req->setReqInstSeqNum(node_ptr->seqNum);
     }

     PacketPtr pkt;
     uint8_t* pkt_data = new uint8_t[req->getSize()];
     if (node_ptr->isLoad()) {
         pkt = Packet::createRead(req);
     } else {
         pkt = Packet::createWrite(req);
         memset(pkt_data, 0xA, req->getSize());
     }
     pkt->dataDynamic(pkt_data);

     // Call MasterPort method to send a timing request for this packet
     bool success = port.sendTimingReq(pkt);
     ++numSendAttempted;

     if (!success) {
         // If it fails, return the packet to retry when a retry is signalled by
         // the cache
         ++numSendFailed;
         DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
         return pkt;
     } else {
         // It is succeeds, return nullptr
         ++numSendSucceeded;
         return nullptr;
     }
 }

 bool
 TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
 {
     // Assert the node is dependency-free
     assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);

     // If this is the first attempt, print a debug message to indicate this.
     if (first) {
         DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
             " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
             node_ptr->robNum);
     }

     // Check if resources are available to issue the specific node
     if (hwResource.isAvailable(node_ptr)) {
         // If resources are free only then add to readyList
         DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
             " to readyList, occupying resources.\n", node_ptr->seqNum);
         // Compute the execute tick by adding the compute delay for the node
         // and add the ready node to the ready list
         addToSortedReadyList(node_ptr->seqNum,
                                 owner.clockEdge() + node_ptr->compDelay);
         // Account for the resources taken up by this issued node.
         hwResource.occupy(node_ptr);
         return true;

     } else {
         if (first) {
             // Although dependencies are complete, resources are not available.
             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
                 " Adding to depFreeQueue.\n", node_ptr->seqNum);
             depFreeQueue.push(node_ptr);
         } else {
             DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
                 "Still pending issue.\n", node_ptr->seqNum);
         }
         return false;
     }
 }

 void
 TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
 {
     // Release the resources for this completed node.
     if (pkt->isWrite()) {
         // Consider store complete.
         hwResource.releaseStoreBuffer();
         // If it is a store response then do nothing since we do not model
         // dependencies on store completion in the trace. But if we were
         // blocking execution due to store buffer fullness, we need to schedule
         // an event and attempt to progress.
     } else {
         // If it is a load response then release the dependents waiting on it.
         // Get pointer to the completed load
         auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
         assert(graph_itr != depGraph.end());
         GraphNode* node_ptr = graph_itr->second;

         // Release resources occupied by the load
         hwResource.release(node_ptr);

         DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
                 " dependents..\n", node_ptr->seqNum);

         for (auto child : node_ptr->dependents) {
             if (child->removeDepOnInst(node_ptr->seqNum)) {
                 checkAndIssue(child);
             }
         }

         // clear the dynamically allocated set of dependents
         (node_ptr->dependents).clear();
         // Update the stat for numOps completed
         owner.updateNumOps(node_ptr->robNum);
         // delete node
         delete node_ptr;
         // remove from graph
         depGraph.erase(graph_itr);
     }

     if (DTRACE(TraceCPUData)) {
         printReadyList();
     }

     // If the size of the dependency graph is less than the dependency window
     // then read from the trace file to populate the graph next time we are in
     // execute.
     if (depGraph.size() < windowSize && !traceComplete)
         nextRead = true;

     // If not waiting for retry, attempt to schedule next event
     if (!retryPkt) {
         // We might have new dep-free nodes in the list which will have execute
         // tick greater than or equal to curTick. But a new dep-free node might
         // have its execute tick earlier. Therefore, attempt to reschedule. It
         // could happen that the readyList is empty and we got here via a
         // last remaining response. So, either the trace is complete or there
         // are pending nodes in the depFreeQueue. The checking is done in the
         // execute() control flow, so schedule an event to go via that flow.
         Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
             std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
         DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
                 next_event_tick);
         owner.schedDcacheNextEvent(next_event_tick);
     }
 }

 void
 TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
                                                     Tick exec_tick)
 {
     ReadyNode ready_node;
     ready_node.seqNum = seq_num;
     ready_node.execTick = exec_tick;

     // Iterator to readyList
     auto itr = readyList.begin();

     // If the readyList is empty, simply insert the new node at the beginning
     // and return
     if (itr == readyList.end()) {
         readyList.insert(itr, ready_node);
         maxReadyListSize = std::max<double>(readyList.size(),
                                               maxReadyListSize.value());
         return;
     }

     // If the new node has its execution tick equal to the first node in the
     // list then go to the next node. If the first node in the list failed
     // to execute, its position as the first is thus maintained.
     if (retryPkt)
         if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
             itr++;

     // Increment the iterator and compare the node pointed to by it to the new
     // node till the position to insert the new node is found.
     bool found = false;
     while (!found && itr != readyList.end()) {
         // If the execution tick of the new node is less than the node then
         // this is the position to insert
         if (exec_tick < itr->execTick)
             found = true;
         // If the execution tick of the new node is equal to the node then
         // sort in ascending order of sequence numbers
         else if (exec_tick == itr->execTick) {
             // If the sequence number of the new node is less than the node
             // then this is the position to insert
             if (seq_num < itr->seqNum)
                 found = true;
             // Else go to next node
             else
                 itr++;
         }
         // If the execution tick of the new node is greater than the node then
         // go to the next node
         else
             itr++;
     }
     readyList.insert(itr, ready_node);
     // Update the stat for max size reached of the readyList
     maxReadyListSize = std::max<double>(readyList.size(),
                                           maxReadyListSize.value());
 }

 void
 TraceCPU::ElasticDataGen::printReadyList() {

     auto itr = readyList.begin();
     if (itr == readyList.end()) {
         DPRINTF(TraceCPUData, "readyList is empty.\n");
         return;
     }
     DPRINTF(TraceCPUData, "Printing readyList:\n");
     while (itr != readyList.end()) {
         auto graph_itr = depGraph.find(itr->seqNum);
         GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
         DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
             node_ptr->typeToStr(), itr->execTick);
         itr++;
     }
 }

 TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
     uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
   : sizeROB(max_rob),
     sizeStoreBuffer(max_stores),
     sizeLoadBuffer(max_loads),
     oldestInFlightRobNum(UINT64_MAX),
     numInFlightLoads(0),
     numInFlightStores(0)
 {}

 void
 TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
 {
     // Occupy ROB entry for the issued node
     // Merely maintain the oldest node, i.e. numerically least robNum by saving
     // it in the variable oldestInFLightRobNum.
     inFlightNodes[new_node->seqNum] = new_node->robNum;
     oldestInFlightRobNum = inFlightNodes.begin()->second;

     // Occupy Load/Store Buffer entry for the issued node if applicable
     if (new_node->isLoad()) {
         ++numInFlightLoads;
     } else if (new_node->isStore()) {
         ++numInFlightStores;
     } // else if it is a non load/store node, no buffer entry is occupied

     printOccupancy();
 }

 void
 TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
 {
     assert(!inFlightNodes.empty());
     DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
         done_node->seqNum);

     assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
     inFlightNodes.erase(done_node->seqNum);

     if (inFlightNodes.empty()) {
         // If we delete the only in-flight node and then the
         // oldestInFlightRobNum is set to it's initialized (max) value.
         oldestInFlightRobNum = UINT64_MAX;
     } else {
         // Set the oldest in-flight node rob number equal to the first node in
         // the inFlightNodes since that will have the numerically least value.
         oldestInFlightRobNum = inFlightNodes.begin()->second;
     }

     DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
         "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
         oldestInFlightRobNum);

     // A store is considered complete when a request is sent, thus ROB entry is
     // freed. But it occupies an entry in the Store Buffer until its response
     // is received. A load is considered complete when a response is received,
     // thus both ROB and Load Buffer entries can be released.
     if (done_node->isLoad()) {
         assert(numInFlightLoads != 0);
         --numInFlightLoads;
     }
     // For normal writes, we send the requests out and clear a store buffer
     // entry on response. For writes which are strictly ordered, for e.g.
     // writes to device registers, we do that within release() which is called
     // when node is executed and taken off from readyList.
     if (done_node->isStore() && done_node->isStrictlyOrdered()) {
         releaseStoreBuffer();
     }
 }

 void
 TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
 {
     assert(numInFlightStores != 0);
     --numInFlightStores;
 }

 bool
 TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
     const GraphNode* new_node) const
 {
     uint16_t num_in_flight_nodes;
     if (inFlightNodes.empty()) {
         num_in_flight_nodes = 0;
         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
             " #in-flight nodes = 0", new_node->seqNum);
     } else if (new_node->robNum > oldestInFlightRobNum) {
         // This is the intuitive case where new dep-free node is younger
         // instruction than the oldest instruction in-flight. Thus we make sure
         // in_flight_nodes does not overflow.
         num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
             " #in-flight nodes = %d - %d =  %d", new_node->seqNum,
              new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
     } else {
         // This is the case where an instruction older than the oldest in-
         // flight instruction becomes dep-free. Thus we must have already
         // accounted for the entry in ROB for this new dep-free node.
         // Immediately after this check returns true, oldestInFlightRobNum will
         // be updated in occupy(). We simply let this node issue now.
         num_in_flight_nodes = 0;
         DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
             " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
             new_node->seqNum, new_node->robNum);
     }
     DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ  = %d/%d.\n",
         numInFlightLoads, sizeLoadBuffer,
         numInFlightStores, sizeStoreBuffer);
     // Check if resources are available to issue the specific node
     if (num_in_flight_nodes >= sizeROB) {
         return false;
     }
     if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
         return false;
     }
     if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
         return false;
     }
     return true;
 }

 bool
 TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
     // Return true if there is at least one read or write request in flight
     return (numInFlightStores != 0 || numInFlightLoads != 0);
 }

 void
 TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
     DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
             "LQ = %d/%d, SQ  = %d/%d.\n",
             oldestInFlightRobNum,
             numInFlightLoads, sizeLoadBuffer,
             numInFlightStores, sizeStoreBuffer);
 }

 void
 TraceCPU::FixedRetryGen::regStats()
 {
     using namespace Stats;

     numSendAttempted
     .name(name() + ".numSendAttempted")
     .desc("Number of first attempts to send a request")
     ;

     numSendSucceeded
     .name(name() + ".numSendSucceeded")
     .desc("Number of successful first attempts")
     ;

     numSendFailed
     .name(name() + ".numSendFailed")
     .desc("Number of failed first attempts")
     ;

     numRetrySucceeded
     .name(name() + ".numRetrySucceeded")
     .desc("Number of successful retries")
     ;

     instLastTick
     .name(name() + ".instLastTick")
     .desc("Last tick simulated from the fixed inst trace")
     ;
 }

 Tick
 TraceCPU::FixedRetryGen::init()
 {
     DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
             " IcacheGen: fixed issue with retry.\n");

     if (nextExecute()) {
         DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
         return currElement.tick;
     } else {
         panic("Read of first message in the trace failed.\n");
         return MaxTick;
     }
 }

 bool
 TraceCPU::FixedRetryGen::tryNext()
 {
     // If there is a retry packet, try to send it
     if (retryPkt) {

         DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");

         if (!port.sendTimingReq(retryPkt)) {
             // Still blocked! This should never occur.
             DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
             return false;
         }
         ++numRetrySucceeded;
     } else {

         DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");

         // try sending current element
         assert(currElement.isValid());

         ++numSendAttempted;

         if (!send(currElement.addr, currElement.blocksize,
                     currElement.cmd, currElement.flags, currElement.pc)) {
             DPRINTF(TraceCPUInst, "currElement sending failed.\n");
             ++numSendFailed;
             // return false to indicate not to schedule next event
             return false;
         } else {
             ++numSendSucceeded;
         }
     }
     // If packet was sent successfully, either retryPkt or currElement, return
     // true to indicate to schedule event at current Tick plus delta. If packet
     // was sent successfully and there is no next packet to send, return false.
     DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
         "element.\n");
     retryPkt = nullptr;
     // Read next element into currElement, currElement gets cleared so save the
     // tick to calculate delta
     Tick last_tick = currElement.tick;
     if (nextExecute()) {
         assert(currElement.tick >= last_tick);
         delta = currElement.tick - last_tick;
     }
     return !traceComplete;
 }

 void
 TraceCPU::FixedRetryGen::exit()
 {
     trace.reset();
 }

 bool
 TraceCPU::FixedRetryGen::nextExecute()
 {
     if (traceComplete)
         // We are at the end of the file, thus we have no more messages.
         // Return false.
         return false;


     //Reset the currElement to the default values
     currElement.clear();

     // Read the next line to get the next message. If that fails then end of
     // trace has been reached and traceComplete needs to be set in addition
     // to returning false. If successful then next message is in currElement.
     if (!trace.read(&currElement)) {
         traceComplete = true;
         instLastTick = curTick();
         return false;
     }

     DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
             currElement.cmd.isRead() ? 'r' : 'w',
             currElement.addr,
             currElement.pc,
             currElement.blocksize,
             currElement.tick);

     return true;
 }

 bool
 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
               Request::FlagsType flags, Addr pc)
 {

     // Create new request
     auto req = std::make_shared<Request>(addr, size, flags, masterID);
     req->setPC(pc);

     // If this is not done it triggers assert in L1 cache for invalid contextId
     req->setContext(ContextID(0));

     // Embed it in a packet
     PacketPtr pkt = new Packet(req, cmd);

     uint8_t* pkt_data = new uint8_t[req->getSize()];
     pkt->dataDynamic(pkt_data);

     if (cmd.isWrite()) {
         memset(pkt_data, 0xA, req->getSize());
     }

     // Call MasterPort method to send a timing request for this packet
     bool success = port.sendTimingReq(pkt);
     if (!success) {
         // If it fails, save the packet to retry when a retry is signalled by
         // the cache
         retryPkt = pkt;
     }
     return success;
 }

 void
 TraceCPU::icacheRetryRecvd()
 {
     // Schedule an event to go through the control flow in the same tick as
     // retry is received
     DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
             " event @%lli.\n", curTick());
     schedule(icacheNextEvent, curTick());
 }

 void
 TraceCPU::dcacheRetryRecvd()
 {
     // Schedule an event to go through the execute flow in the same tick as
     // retry is received
     DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
             " event @%lli.\n", curTick());
     schedule(dcacheNextEvent, curTick());
 }

 void
 TraceCPU::schedDcacheNextEvent(Tick when)
 {
     if (!dcacheNextEvent.scheduled()) {
         DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
                 when);
         schedule(dcacheNextEvent, when);
         ++numSchedDcacheEvent;
     } else if (when < dcacheNextEvent.when()) {
         DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
                 " to %lli.\n", dcacheNextEvent.when(), when);
         reschedule(dcacheNextEvent, when);
     }

 }

 bool
 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
 {
     // All responses on the instruction fetch side are ignored. Simply delete
     // the packet to free allocated memory
     delete pkt;

     return true;
 }

 void
 TraceCPU::IcachePort::recvReqRetry()
 {
     owner->icacheRetryRecvd();
 }

 void
 TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
 {
     DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
     dcacheGen.completeMemAccess(pkt);
 }

 bool
 TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
 {
     // Handle the responses for data memory requests which is done inside the
     // elastic data generator
     owner->dcacheRecvTimingResp(pkt);
     // After processing the response delete the packet to free
     // memory
     delete pkt;

     return true;
 }

 void
 TraceCPU::DcachePort::recvReqRetry()
 {
     owner->dcacheRetryRecvd();
 }

 TraceCPU::ElasticDataGen::InputStream::InputStream(
     const std::string& filename,
     const double time_multiplier)
     : trace(filename),
       timeMultiplier(time_multiplier),
       microOpCount(0)
 {
     // Create a protobuf message for the header and read it from the stream
     ProtoMessage::InstDepRecordHeader header_msg;
     if (!trace.read(header_msg)) {
         panic("Failed to read packet header from %s\n", filename);

         if (header_msg.tick_freq() != SimClock::Frequency) {
             panic("Trace %s was recorded with a different tick frequency %d\n",
                   header_msg.tick_freq());
         }
     } else {
         // Assign window size equal to the field in the trace that was recorded
         // when the data dependency trace was captured in the o3cpu model
         windowSize = header_msg.window_size();
     }
 }

 void
 TraceCPU::ElasticDataGen::InputStream::reset()
 {
     trace.reset();
 }

 bool
 TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
 {
     ProtoMessage::InstDepRecord pkt_msg;
     if (trace.read(pkt_msg)) {
         // Required fields
         element->seqNum = pkt_msg.seq_num();
         element->type = pkt_msg.type();
         // Scale the compute delay to effectively scale the Trace CPU frequency
         element->compDelay = pkt_msg.comp_delay() * timeMultiplier;

         // Repeated field robDepList
         element->clearRobDep();
         assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
         for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
             element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
             element->numRobDep += 1;
         }

         // Repeated field
         element->clearRegDep();
         assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
         for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
             // There is a possibility that an instruction has both, a register
             // and order dependency on an instruction. In such a case, the
             // register dependency is omitted
             bool duplicate = false;
             for (int j = 0; j < element->numRobDep; j++) {
                 duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
             }
             if (!duplicate) {
                 element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
                 element->numRegDep += 1;
             }
         }

         // Optional fields
         if (pkt_msg.has_p_addr())
             element->physAddr = pkt_msg.p_addr();
         else
             element->physAddr = 0;

         if (pkt_msg.has_v_addr())
             element->virtAddr = pkt_msg.v_addr();
         else
             element->virtAddr = 0;

         if (pkt_msg.has_asid())
             element->asid = pkt_msg.asid();
         else
             element->asid = 0;

         if (pkt_msg.has_size())
             element->size = pkt_msg.size();
         else
             element->size = 0;

         if (pkt_msg.has_flags())
             element->flags = pkt_msg.flags();
         else
             element->flags = 0;

         if (pkt_msg.has_pc())
             element->pc = pkt_msg.pc();
         else
             element->pc = 0;

         // ROB occupancy number
         ++microOpCount;
         if (pkt_msg.has_weight()) {
             microOpCount += pkt_msg.weight();
         }
         element->robNum = microOpCount;
         return true;
     }

     // We have reached the end of the file
     return false;
 }

 bool
 TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
 {
     for (auto& own_reg_dep : regDep) {
         if (own_reg_dep == reg_dep) {
             // If register dependency is found, make it zero and return true
             own_reg_dep = 0;
             assert(numRegDep > 0);
             --numRegDep;
             DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
                     "done.\n", seqNum, reg_dep);
             return true;
         }
     }

     // Return false if the dependency is not found
     return false;
 }

 bool
 TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
 {
     for (auto& own_rob_dep : robDep) {
         if (own_rob_dep == rob_dep) {
             // If the rob dependency is found, make it zero and return true
             own_rob_dep = 0;
             assert(numRobDep > 0);
             --numRobDep;
             DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
                 "done.\n", seqNum, rob_dep);
             return true;
         }
     }
     return false;
 }

 void
 TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
     for (auto& own_reg_dep : regDep) {
         own_reg_dep = 0;
     }
     numRegDep = 0;
 }

 void
 TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
     for (auto& own_rob_dep : robDep) {
         own_rob_dep = 0;
     }
     numRobDep = 0;
 }

 bool
 TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
 {
     // If it is an rob dependency then remove it
     if (!removeRobDep(done_seq_num)) {
         // If it is not an rob dependency then it must be a register dependency
         // If the register dependency is not found, it violates an assumption
         // and must be caught by assert.
         bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
         assert(regdep_found);
     }
     // Return true if the node is dependency free
     return (numRobDep == 0 && numRegDep == 0);
 }

 void
 TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
 {
     DPRINTFR(TraceCPUData, "%lli", seqNum);
     DPRINTFR(TraceCPUData, ",%s", typeToStr());
     if (isLoad() || isStore()) {
         DPRINTFR(TraceCPUData, ",%i", physAddr);
         DPRINTFR(TraceCPUData, ",%i", size);
         DPRINTFR(TraceCPUData, ",%i", flags);
     }
     DPRINTFR(TraceCPUData, ",%lli", compDelay);
     int i = 0;
     DPRINTFR(TraceCPUData, "robDep:");
     while (robDep[i] != 0) {
         DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
         i++;
     }
     i = 0;
     DPRINTFR(TraceCPUData, "regDep:");
     while (regDep[i] != 0) {
         DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
         i++;
     }
     auto child_itr = dependents.begin();
     DPRINTFR(TraceCPUData, "dependents:");
     while (child_itr != dependents.end()) {
         DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
         child_itr++;
     }

     DPRINTFR(TraceCPUData, "\n");
 }

 std::string
 TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
 {
     return Record::RecordType_Name(type);
 }

 TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
     : trace(filename)
 {
     // Create a protobuf message for the header and read it from the stream
     ProtoMessage::PacketHeader header_msg;
     if (!trace.read(header_msg)) {
         panic("Failed to read packet header from %s\n", filename);

         if (header_msg.tick_freq() != SimClock::Frequency) {
             panic("Trace %s was recorded with a different tick frequency %d\n",
                   header_msg.tick_freq());
         }
     }
 }

 void
 TraceCPU::FixedRetryGen::InputStream::reset()
 {
     trace.reset();
 }

 bool
 TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
 {
     ProtoMessage::Packet pkt_msg;
     if (trace.read(pkt_msg)) {
         element->cmd = pkt_msg.cmd();
         element->addr = pkt_msg.addr();
         element->blocksize = pkt_msg.size();
         element->tick = pkt_msg.tick();
         element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
         element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
         return true;
     }

     // We have reached the end of the file
     return false;
 }