src/cpu/o3/probe/elastic_trace.cc - public/gem5 - Git at Google

 /*
  * Copyright (c) 2013 - 2015 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
  * not be construed as granting a license to any other intellectual
  * property including but not limited to intellectual property relating
  * to a hardware implementation of the functionality of the software
  * licensed hereunder.  You may use the software subject to the license
  * terms below provided that you ensure that this notice is replicated
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met: redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer;
  * redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution;
  * neither the name of the copyright holders nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Radhika Jagtap
  *          Andreas Hansson
  *          Thomas Grass
  */

 #include "cpu/o3/probe/elastic_trace.hh"

 #include "base/callback.hh"
 #include "base/output.hh"
 #include "base/trace.hh"
 #include "cpu/reg_class.hh"
 #include "debug/ElasticTrace.hh"
 #include "mem/packet.hh"

 ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
     :  ProbeListenerObject(params),
        regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
        firstWin(true),
        lastClearedSeqNum(0),
        depWindowSize(params->depWindowSize),
        dataTraceStream(nullptr),
        instTraceStream(nullptr),
        startTraceInst(params->startTraceInst),
        allProbesReg(false),
        traceVirtAddr(params->traceVirtAddr)
 {
     cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
     fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
                 "support dependency tracing.\n", name());

     fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
                 "Recommended size is 3x ROB size in the O3CPU.\n");

     fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
                 "single-threaded workload only", cpu->numThreads, name());
     // Initialize the protobuf output stream
     fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
                 "trace file path to instFetchTraceFile");
     fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
                 "trace file path to dataDepTraceFile");
     std::string filename = simout.resolve(name() + "." +
                                             params->instFetchTraceFile);
     instTraceStream = new ProtoOutputStream(filename);
     filename = simout.resolve(name() + "." + params->dataDepTraceFile);
     dataTraceStream = new ProtoOutputStream(filename);
     // Create a protobuf message for the header and write it to the stream
     ProtoMessage::PacketHeader inst_pkt_header;
     inst_pkt_header.set_obj_id(name());
     inst_pkt_header.set_tick_freq(SimClock::Frequency);
     instTraceStream->write(inst_pkt_header);
     // Create a protobuf message for the header and write it to
     // the stream
     ProtoMessage::InstDepRecordHeader data_rec_header;
     data_rec_header.set_obj_id(name());
     data_rec_header.set_tick_freq(SimClock::Frequency);
     data_rec_header.set_window_size(depWindowSize);
     dataTraceStream->write(data_rec_header);
     // Register a callback to flush trace records and close the output streams.
     Callback* cb = new MakeCallback<ElasticTrace,
         &ElasticTrace::flushTraces>(this);
     registerExitCallback(cb);
 }

 void
 ElasticTrace::regProbeListeners()
 {
     inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
         curTick(), startTraceInst);
     if (startTraceInst == 0) {
         // If we want to start tracing from the start of the simulation,
         // register all elastic trace probes now.
         regEtraceListeners();
     } else {
         // Schedule an event to register all elastic trace probes when
         // specified no. of instructions are committed.
         cpu->getContext(0)->scheduleInstCountEvent(
                 &regEtraceListenersEvent, startTraceInst);
     }
 }

 void
 ElasticTrace::regEtraceListeners()
 {
     assert(!allProbesReg);
     inform("@%llu: No. of instructions committed = %llu, registering elastic"
         " probe listeners", curTick(), cpu->numSimulatedInsts());
     // Create new listeners: provide method to be called upon a notify() for
     // each probe point.
     listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
                         "FetchRequest", &ElasticTrace::fetchReqTrace));
     listeners.push_back(new ProbeListenerArg<ElasticTrace,
             DynInstConstPtr>(this, "Execute",
                 &ElasticTrace::recordExecTick));
     listeners.push_back(new ProbeListenerArg<ElasticTrace,
             DynInstConstPtr>(this, "ToCommit",
                 &ElasticTrace::recordToCommTick));
     listeners.push_back(new ProbeListenerArg<ElasticTrace,
             DynInstConstPtr>(this, "Rename",
                 &ElasticTrace::updateRegDep));
     listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
                         "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
     listeners.push_back(new ProbeListenerArg<ElasticTrace,
             DynInstConstPtr>(this, "Squash",
                 &ElasticTrace::addSquashedInst));
     listeners.push_back(new ProbeListenerArg<ElasticTrace,
             DynInstConstPtr>(this, "Commit",
                 &ElasticTrace::addCommittedInst));
     allProbesReg = true;
 }

 void
 ElasticTrace::fetchReqTrace(const RequestPtr &req)
 {

     DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
              (MemCmd::ReadReq),
              req->getPC(), req->getVaddr(), req->getPaddr(),
              req->getFlags(), req->getSize(), curTick());

     // Create a protobuf message including the request fields necessary to
     // recreate the request in the TraceCPU.
     ProtoMessage::Packet inst_fetch_pkt;
     inst_fetch_pkt.set_tick(curTick());
     inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
     inst_fetch_pkt.set_pc(req->getPC());
     inst_fetch_pkt.set_flags(req->getFlags());
     inst_fetch_pkt.set_addr(req->getPaddr());
     inst_fetch_pkt.set_size(req->getSize());
     // Write the message to the stream.
     instTraceStream->write(inst_fetch_pkt);
 }

 void
 ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst)
 {

     // In a corner case, a retired instruction is propagated backward to the
     // IEW instruction queue to handle some side-channel information. But we
     // must not process an instruction again. So we test the sequence number
     // against the lastClearedSeqNum and skip adding the instruction for such
     // corner cases.
     if (dyn_inst->seqNum <= lastClearedSeqNum) {
         DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
         has already retired (mostly squashed)", dyn_inst->seqNum);
         // Do nothing as program has proceeded and this inst has been
         // propagated backwards to handle something.
         return;
     }

     DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
                 curTick());
     // Either the execution info object will already exist if this
     // instruction had a register dependency recorded in the rename probe
     // listener before entering execute stage or it will not exist and will
     // need to be created here.
     InstExecInfo* exec_info_ptr;
     auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
     if (itr_exec_info != tempStore.end()) {
         exec_info_ptr = itr_exec_info->second;
     } else {
         exec_info_ptr = new InstExecInfo;
         tempStore[dyn_inst->seqNum] = exec_info_ptr;
     }

     exec_info_ptr->executeTick = curTick();
     maxTempStoreSize = std::max(tempStore.size(),
                                 (std::size_t)maxTempStoreSize.value());
 }

 void
 ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst)
 {
     // If tracing has just been enabled then the instruction at this stage of
     // execution is far enough that we cannot gather info about its past like
     // the tick it started execution. Simply return until we see an instruction
     // that is found in the tempStore.
     auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
     if (itr_exec_info == tempStore.end()) {
         DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
                     " skipping.\n", dyn_inst->seqNum);
         return;
     }

     DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
                 curTick());
     InstExecInfo* exec_info_ptr = itr_exec_info->second;
     exec_info_ptr->toCommitTick = curTick();

 }

 void
 ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
 {
     // Get the sequence number of the instruction
     InstSeqNum seq_num = dyn_inst->seqNum;

     assert(dyn_inst->seqNum > lastClearedSeqNum);

     // Since this is the first probe activated in the pipeline, create
     // a new execution info object to track this instruction as it
     // progresses through the pipeline.
     InstExecInfo* exec_info_ptr = new InstExecInfo;
     tempStore[seq_num] = exec_info_ptr;

     // Loop through the source registers and look up the dependency map. If
     // the source register entry is found in the dependency map, add a
     // dependency on the last writer.
     int8_t max_regs = dyn_inst->numSrcRegs();
     for (int src_idx = 0; src_idx < max_regs; src_idx++) {

         const RegId& src_reg = dyn_inst->srcRegIdx(src_idx);
         if (!src_reg.isMiscReg() &&
             !src_reg.isZeroReg()) {
             // Get the physical register index of the i'th source register.
             PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
             DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
                      " %i (%s)\n", seq_num,
                      phys_src_reg->flatIndex(), phys_src_reg->className());
             auto itr_writer = physRegDepMap.find(phys_src_reg->flatIndex());
             if (itr_writer != physRegDepMap.end()) {
                 InstSeqNum last_writer = itr_writer->second;
                 // Additionally the dependency distance is kept less than the
                 // window size parameter to limit the memory allocation to
                 // nodes in the graph. If the window were tending to infinite
                 // we would have to load a large number of node objects during
                 // replay.
                 if (seq_num - last_writer < depWindowSize) {
                     // Record a physical register dependency.
                     exec_info_ptr->physRegDepSet.insert(last_writer);
                 }
             }

         }

     }

     // Loop through the destination registers of this instruction and update
     // the physical register dependency map for last writers to registers.
     max_regs = dyn_inst->numDestRegs();
     for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
         // For data dependency tracking the register must be an int, float or
         // CC register and not a Misc register.
         const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
         if (!dest_reg.isMiscReg() &&
             !dest_reg.isZeroReg()) {
             // Get the physical register index of the i'th destination
             // register.
             PhysRegIdPtr phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
             DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
                      " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
                      dest_reg.className());
             physRegDepMap[phys_dest_reg->flatIndex()] = seq_num;
         }
     }
     maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
                                     (std::size_t)maxPhysRegDepMapSize.value());
 }

 void
 ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
 {
     DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
             inst_reg_pair.second);
     auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
     if (itr_regdep_map != physRegDepMap.end())
         physRegDepMap.erase(itr_regdep_map);
 }

 void
 ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst)
 {
     // If the squashed instruction was squashed before being processed by
     // execute stage then it will not be in the temporary store. In this case
     // do nothing and return.
     auto itr_exec_info = tempStore.find(head_inst->seqNum);
     if (itr_exec_info == tempStore.end())
         return;

     // If there is a squashed load for which a read request was
     // sent before it got squashed then add it to the trace.
     DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
                 head_inst->seqNum);
     // Get pointer to the execution info object corresponding to the inst.
     InstExecInfo* exec_info_ptr = itr_exec_info->second;
     if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
         exec_info_ptr->toCommitTick != MaxTick &&
         head_inst->hasRequest() &&
         head_inst->getFault() == NoFault) {
         // Add record to depTrace with commit parameter as false.
         addDepTraceRecord(head_inst, exec_info_ptr, false);
     }
     // As the information contained is no longer needed, remove the execution
     // info object from the temporary store.
     clearTempStoreUntil(head_inst);
 }

 void
 ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst)
 {
     DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
                 head_inst->seqNum);

     // Add the instruction to the depTrace.
     if (!head_inst->isNop()) {

         // If tracing has just been enabled then the instruction at this stage
         // of execution is far enough that we cannot gather info about its past
         // like the tick it started execution. Simply return until we see an
         // instruction that is found in the tempStore.
         auto itr_temp_store = tempStore.find(head_inst->seqNum);
         if (itr_temp_store == tempStore.end()) {
             DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
                 "store, skipping.\n", head_inst->seqNum);
             return;
         }

         // Get pointer to the execution info object corresponding to the inst.
         InstExecInfo* exec_info_ptr = itr_temp_store->second;
         assert(exec_info_ptr->executeTick != MaxTick);
         assert(exec_info_ptr->toCommitTick != MaxTick);

         // Check if the instruction had a fault, if it predicated false and
         // thus previous register values were restored or if it was a
         // load/store that did not have a request (e.g. when the size of the
         // request is zero). In all these cases the instruction is set as
         // executed and is picked up by the commit probe listener. But a
         // request is not issued and registers are not written. So practically,
         // skipping these should not hurt as execution would not stall on them.
         // Alternatively, these could be included merely as a compute node in
         // the graph. Removing these for now. If correlation accuracy needs to
         // be improved in future these can be turned into comp nodes at the
         // cost of bigger traces.
         if (head_inst->getFault() != NoFault) {
             DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
                     "skip adding it to the trace\n",
                     (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
                     head_inst->seqNum);
         } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
             DPRINTF(ElasticTrace, "Load/store [sn:%lli]  has no request so "
                     "skip adding it to the trace\n", head_inst->seqNum);
         } else if (!head_inst->readPredicate()) {
             DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
                     "skip adding it to the trace\n",
                     (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
                     head_inst->seqNum);
         } else {
             // Add record to depTrace with commit parameter as true.
             addDepTraceRecord(head_inst, exec_info_ptr, true);
         }
     }
     // As the information contained is no longer needed, remove the execution
     // info object from the temporary store.
     clearTempStoreUntil(head_inst);
 }

 void
 ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst,
                                 InstExecInfo* exec_info_ptr, bool commit)
 {
     // Create a record to assign dynamic intruction related fields.
     TraceInfo* new_record = new TraceInfo;
     // Add to map for sequence number look up to retrieve the TraceInfo pointer
     traceInfoMap[head_inst->seqNum] = new_record;

     // Assign fields from the instruction
     new_record->instNum = head_inst->seqNum;
     new_record->commit = commit;
     new_record->type = head_inst->isLoad() ? Record::LOAD :
                         (head_inst->isStore() ? Record::STORE :
                         Record::COMP);

     // Assign fields for creating a request in case of a load/store
     new_record->reqFlags = head_inst->memReqFlags;
     new_record->virtAddr = head_inst->effAddr;
     new_record->asid = head_inst->asid;
     new_record->physAddr = head_inst->physEffAddr;
     // Currently the tracing does not support split requests.
     new_record->size = head_inst->effSize;
     new_record->pc = head_inst->instAddr();

     // Assign the timing information stored in the execution info object
     new_record->executeTick = exec_info_ptr->executeTick;
     new_record->toCommitTick = exec_info_ptr->toCommitTick;
     new_record->commitTick = curTick();

     // Assign initial values for number of dependents and computational delay
     new_record->numDepts = 0;
     new_record->compDelay = -1;

     // The physical register dependency set of the first instruction is
     // empty. Since there are no records in the depTrace at this point, the
     // case of adding an ROB dependency by using a reverse iterator is not
     // applicable. Thus, populate the fields of the record corresponding to the
     // first instruction and return.
     if (depTrace.empty()) {
         // Store the record in depTrace.
         depTrace.push_back(new_record);
         DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
                 new_record->instNum);
         return;
     }

     // Clear register dependencies for squashed loads as they may be dependent
     // on squashed instructions and we do not add those to the trace.
     if (head_inst->isLoad() && !commit) {
          (exec_info_ptr->physRegDepSet).clear();
     }

     // Assign the register dependencies stored in the execution info object
     std::set<InstSeqNum>::const_iterator dep_set_it;
     for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
          dep_set_it != (exec_info_ptr->physRegDepSet).end();
          ++dep_set_it) {
         auto trace_info_itr = traceInfoMap.find(*dep_set_it);
         if (trace_info_itr != traceInfoMap.end()) {
             // The register dependency is valid. Assign it and calculate
             // computational delay
             new_record->physRegDepList.push_back(*dep_set_it);
             DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
                     "%lli\n", new_record->instNum, *dep_set_it);
             TraceInfo* reg_dep = trace_info_itr->second;
             reg_dep->numDepts++;
             compDelayPhysRegDep(reg_dep, new_record);
             ++numRegDep;
         } else {
             // The instruction that this has a register dependency on was
             // not added to the trace because of one of the following
             // 1. it was an instruction that had a fault
             // 2. it was an instruction that was predicated false and
             // previous register values were restored
             // 3. it was load/store that did not have a request (e.g. when
             // the size of the request is zero but this may not be a fault)
             // In all these cases the instruction is set as executed and is
             // picked up by the commit probe listener. But a request is not
             // issued and registers are not written to in these cases.
             DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
                     "%lli is skipped\n",new_record->instNum, *dep_set_it);
         }
     }

     // Check for and assign an ROB dependency in addition to register
     // dependency before adding the record to the trace.
     // As stores have to commit in order a store is dependent on the last
     // committed load/store. This is recorded in the ROB dependency.
     if (head_inst->isStore()) {
         // Look up store-after-store order dependency
         updateCommitOrderDep(new_record, false);
         // Look up store-after-load order dependency
         updateCommitOrderDep(new_record, true);
     }

     // In case a node is dependency-free or its dependency got discarded
     // because it was outside the window, it is marked ready in the ROB at the
     // time of issue. A request is sent as soon as possible. To model this, a
     // node is assigned an issue order dependency on a committed instruction
     // that completed earlier than it. This is done to avoid the problem of
     // determining the issue times of such dependency-free nodes during replay
     // which could lead to too much parallelism, thinking conservatively.
     if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
         updateIssueOrderDep(new_record);
     }

     // Store the record in depTrace.
     depTrace.push_back(new_record);
     DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
             (commit ? "committed" : "squashed"), new_record->instNum);

     // To process the number of records specified by depWindowSize in the
     // forward direction, the depTrace must have twice as many records
     // to check for dependencies.
     if (depTrace.size() == 2 * depWindowSize) {

         DPRINTF(ElasticTrace, "Writing out trace...\n");

         // Write out the records which have been processed to the trace
         // and remove them from the depTrace.
         writeDepTrace(depWindowSize);

         // After the first window, writeDepTrace() must check for valid
         // compDelay.
         firstWin = false;
     }
 }

 void
 ElasticTrace::updateCommitOrderDep(TraceInfo* new_record,
                                     bool find_load_not_store)
 {
     assert(new_record->isStore());
     // Iterate in reverse direction to search for the last committed
     // load/store that completed earlier than the new record
     depTraceRevItr from_itr(depTrace.end());
     depTraceRevItr until_itr(depTrace.begin());
     TraceInfo* past_record = *from_itr;
     uint32_t num_go_back = 0;

     // The execution time of this store is when it is sent, that is committed
     Tick execute_tick = curTick();
     // Search for store-after-load or store-after-store order dependency
     while (num_go_back < depWindowSize && from_itr != until_itr) {
         if (find_load_not_store) {
             // Check if previous inst is a load completed earlier by comparing
             // with execute tick
             if (hasLoadCompleted(past_record, execute_tick)) {
                 // Assign rob dependency and calculate the computational delay
                 assignRobDep(past_record, new_record);
                 ++numOrderDepStores;
                 return;
             }
         } else {
             // Check if previous inst is a store sent earlier by comparing with
             // execute tick
             if (hasStoreCommitted(past_record, execute_tick)) {
                 // Assign rob dependency and calculate the computational delay
                 assignRobDep(past_record, new_record);
                 ++numOrderDepStores;
                 return;
             }
         }
         ++from_itr;
         past_record = *from_itr;
         ++num_go_back;
     }
 }

 void
 ElasticTrace::updateIssueOrderDep(TraceInfo* new_record)
 {
     // Interate in reverse direction to search for the last committed
     // record that completed earlier than the new record
     depTraceRevItr from_itr(depTrace.end());
     depTraceRevItr until_itr(depTrace.begin());
     TraceInfo* past_record = *from_itr;

     uint32_t num_go_back = 0;
     Tick execute_tick = 0;

     if (new_record->isLoad()) {
         // The execution time of a load is when a request is sent
         execute_tick = new_record->executeTick;
         ++numIssueOrderDepLoads;
     } else if (new_record->isStore()) {
         // The execution time of a store is when it is sent, i.e. committed
         execute_tick = curTick();
         ++numIssueOrderDepStores;
     } else {
         // The execution time of a non load/store is when it completes
         execute_tick = new_record->toCommitTick;
         ++numIssueOrderDepOther;
     }

     // We search if this record has an issue order dependency on a past record.
     // Once we find it, we update both the new record and the record it depends
     // on and return.
     while (num_go_back < depWindowSize && from_itr != until_itr) {
         // Check if a previous inst is a load sent earlier, or a store sent
         // earlier, or a comp inst completed earlier by comparing with execute
         // tick
         if (hasLoadBeenSent(past_record, execute_tick) ||
             hasStoreCommitted(past_record, execute_tick) ||
             hasCompCompleted(past_record, execute_tick)) {
             // Assign rob dependency and calculate the computational delay
             assignRobDep(past_record, new_record);
             return;
         }
         ++from_itr;
         past_record = *from_itr;
         ++num_go_back;
     }
 }

 void
 ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
     DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
             new_record->typeToStr(), new_record->instNum,
             past_record->instNum);
     // Add dependency on past record
     new_record->robDepList.push_back(past_record->instNum);
     // Update new_record's compute delay with respect to the past record
     compDelayRob(past_record, new_record);
     // Increment number of dependents of the past record
     ++(past_record->numDepts);
     // Update stat to log max number of dependents
     maxNumDependents = std::max(past_record->numDepts,
                                 (uint32_t)maxNumDependents.value());
 }

 bool
 ElasticTrace::hasStoreCommitted(TraceInfo* past_record,
                                     Tick execute_tick) const
 {
     return (past_record->isStore() && past_record->commitTick <= execute_tick);
 }

 bool
 ElasticTrace::hasLoadCompleted(TraceInfo* past_record,
                                     Tick execute_tick) const
 {
     return(past_record->isLoad() && past_record->commit &&
                 past_record->toCommitTick <= execute_tick);
 }

 bool
 ElasticTrace::hasLoadBeenSent(TraceInfo* past_record,
                                 Tick execute_tick) const
 {
     // Check if previous inst is a load sent earlier than this
     return (past_record->isLoad() && past_record->commit &&
         past_record->executeTick <= execute_tick);
 }

 bool
 ElasticTrace::hasCompCompleted(TraceInfo* past_record,
                                     Tick execute_tick) const
 {
     return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
 }

 void
 ElasticTrace::clearTempStoreUntil(const DynInstConstPtr& head_inst)
 {
     // Clear from temp store starting with the execution info object
     // corresponding the head_inst and continue clearing by decrementing the
     // sequence number until the last cleared sequence number.
     InstSeqNum temp_sn = (head_inst->seqNum);
     while (temp_sn > lastClearedSeqNum) {
         auto itr_exec_info = tempStore.find(temp_sn);
         if (itr_exec_info != tempStore.end()) {
             InstExecInfo* exec_info_ptr = itr_exec_info->second;
             // Free allocated memory for the info object
             delete exec_info_ptr;
             // Remove entry from temporary store
             tempStore.erase(itr_exec_info);
         }
         temp_sn--;
     }
     // Update the last cleared sequence number to that of the head_inst
     lastClearedSeqNum = head_inst->seqNum;
 }

 void
 ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record)
 {
     // The computation delay is the delay between the completion tick of the
     // inst. pointed to by past_record and the execution tick of its dependent
     // inst. pointed to by new_record.
     int64_t comp_delay = -1;
     Tick execution_tick = 0, completion_tick = 0;

     DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
             new_record->instNum, past_record->instNum);

     // Get the tick when the node is executed as per the modelling of
     // computation delay
     execution_tick = new_record->getExecuteTick();

     if (past_record->isLoad()) {
         if (new_record->isStore()) {
             completion_tick = past_record->toCommitTick;
         } else {
             completion_tick = past_record->executeTick;
         }
     } else if (past_record->isStore()) {
         completion_tick = past_record->commitTick;
     } else if (past_record->isComp()){
         completion_tick = past_record->toCommitTick;
     }
     assert(execution_tick >= completion_tick);
     comp_delay = execution_tick - completion_tick;

     DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
             execution_tick, completion_tick, comp_delay);

     // Assign the computational delay with respect to the dependency which
     // completes the latest.
     if (new_record->compDelay == -1)
         new_record->compDelay = comp_delay;
     else
         new_record->compDelay = std::min(comp_delay, new_record->compDelay);
     DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
             new_record->compDelay);
 }

 void
 ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record,
                                     TraceInfo* new_record)
 {
     // The computation delay is the delay between the completion tick of the
     // inst. pointed to by past_record and the execution tick of its dependent
     // inst. pointed to by new_record.
     int64_t comp_delay = -1;
     Tick execution_tick = 0, completion_tick = 0;

     DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
             " %lli.\n", new_record->instNum, past_record->instNum);

     // Get the tick when the node is executed as per the modelling of
     // computation delay
     execution_tick = new_record->getExecuteTick();

     // When there is a physical register dependency on an instruction, the
     // completion tick of that instruction is when it wrote to the register,
     // that is toCommitTick. In case, of a store updating a destination
     // register, this is approximated to commitTick instead
     if (past_record->isStore()) {
         completion_tick = past_record->commitTick;
     } else {
         completion_tick = past_record->toCommitTick;
     }
     assert(execution_tick >= completion_tick);
     comp_delay = execution_tick - completion_tick;
     DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
             execution_tick, completion_tick, comp_delay);

     // Assign the computational delay with respect to the dependency which
     // completes the latest.
     if (new_record->compDelay == -1)
         new_record->compDelay = comp_delay;
     else
         new_record->compDelay = std::min(comp_delay, new_record->compDelay);
     DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
             new_record->compDelay);
 }

 Tick
 ElasticTrace::TraceInfo::getExecuteTick() const
 {
     if (isLoad()) {
         // Execution tick for a load instruction is when the request was sent,
         // that is executeTick.
         return executeTick;
     } else if (isStore()) {
         // Execution tick for a store instruction is when the request was sent,
         // that is commitTick.
         return commitTick;
     } else {
         // Execution tick for a non load/store instruction is when the register
         // value was written to, that is commitTick.
         return toCommitTick;
     }
 }

 void
 ElasticTrace::writeDepTrace(uint32_t num_to_write)
 {
     // Write the trace with fields as follows:
     // Instruction sequence number
     // If instruction was a load
     // If instruction was a store
     // If instruction has addr
     // If instruction has size
     // If instruction has flags
     // List of order dependencies - optional, repeated
     // Computational delay with respect to last completed dependency
     // List of physical register RAW dependencies - optional, repeated
     // Weight of a node equal to no. of filtered nodes before it - optional
     uint16_t num_filtered_nodes = 0;
     depTraceItr dep_trace_itr(depTrace.begin());
     depTraceItr dep_trace_itr_start = dep_trace_itr;
     while (num_to_write > 0) {
         TraceInfo* temp_ptr = *dep_trace_itr;
         assert(temp_ptr->type != Record::INVALID);
         // If no node dependends on a comp node then there is no reason to
         // track the comp node in the dependency graph. We filter out such
         // nodes but count them and add a weight field to the subsequent node
         // that we do include in the trace.
         if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
             DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
                      "is as follows:\n", temp_ptr->instNum);
             if (temp_ptr->isLoad() || temp_ptr->isStore()) {
                 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
                 DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
                          "size %i, flags %i\n", temp_ptr->physAddr,
                          temp_ptr->size, temp_ptr->reqFlags);
             } else {
                  DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
             }
             if (firstWin && temp_ptr->compDelay == -1) {
                 if (temp_ptr->isLoad()) {
                     temp_ptr->compDelay = temp_ptr->executeTick;
                 } else if (temp_ptr->isStore()) {
                     temp_ptr->compDelay = temp_ptr->commitTick;
                 } else {
                     temp_ptr->compDelay = temp_ptr->toCommitTick;
                 }
             }
             assert(temp_ptr->compDelay != -1);
             DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
                      temp_ptr->compDelay);

             // Create a protobuf message for the dependency record
             ProtoMessage::InstDepRecord dep_pkt;
             dep_pkt.set_seq_num(temp_ptr->instNum);
             dep_pkt.set_type(temp_ptr->type);
             dep_pkt.set_pc(temp_ptr->pc);
             if (temp_ptr->isLoad() || temp_ptr->isStore()) {
                 dep_pkt.set_flags(temp_ptr->reqFlags);
                 dep_pkt.set_p_addr(temp_ptr->physAddr);
                 // If tracing of virtual addresses is enabled, set the optional
                 // field for it
                 if (traceVirtAddr) {
                     dep_pkt.set_v_addr(temp_ptr->virtAddr);
                     dep_pkt.set_asid(temp_ptr->asid);
                 }
                 dep_pkt.set_size(temp_ptr->size);
             }
             dep_pkt.set_comp_delay(temp_ptr->compDelay);
             if (temp_ptr->robDepList.empty()) {
                 DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
             }
             while (!temp_ptr->robDepList.empty()) {
                 DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
                          temp_ptr->robDepList.front());
                 dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
                 temp_ptr->robDepList.pop_front();
             }
             if (temp_ptr->physRegDepList.empty()) {
                 DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
             }
             while (!temp_ptr->physRegDepList.empty()) {
                 DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
                          temp_ptr->physRegDepList.front());
                 dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
                 temp_ptr->physRegDepList.pop_front();
             }
             if (num_filtered_nodes != 0) {
                 // Set the weight of this node as the no. of filtered nodes
                 // between this node and the last node that we wrote to output
                 // stream. The weight will be used during replay to model ROB
                 // occupancy of filtered nodes.
                 dep_pkt.set_weight(num_filtered_nodes);
                 num_filtered_nodes = 0;
             }
             // Write the message to the protobuf output stream
             dataTraceStream->write(dep_pkt);
         } else {
             // Don't write the node to the trace but note that we have filtered
             // out a node.
             ++numFilteredNodes;
             ++num_filtered_nodes;
         }
         dep_trace_itr++;
         traceInfoMap.erase(temp_ptr->instNum);
         delete temp_ptr;
         num_to_write--;
     }
     depTrace.erase(dep_trace_itr_start, dep_trace_itr);
 }

 void
 ElasticTrace::regStats() {
     ProbeListenerObject::regStats();

     using namespace Stats;
     numRegDep
         .name(name() + ".numRegDep")
         .desc("Number of register dependencies recorded during tracing")
         ;

     numOrderDepStores
         .name(name() + ".numOrderDepStores")
         .desc("Number of commit order (rob) dependencies for a store recorded"
               " on a past load/store during tracing")
         ;

     numIssueOrderDepLoads
         .name(name() + ".numIssueOrderDepLoads")
         .desc("Number of loads that got assigned issue order dependency"
               " because they were dependency-free")
         ;

     numIssueOrderDepStores
         .name(name() + ".numIssueOrderDepStores")
         .desc("Number of stores that got assigned issue order dependency"
               " because they were dependency-free")
         ;

     numIssueOrderDepOther
         .name(name() + ".numIssueOrderDepOther")
         .desc("Number of non load/store insts that got assigned issue order"
               " dependency because they were dependency-free")
         ;

     numFilteredNodes
         .name(name() + ".numFilteredNodes")
         .desc("No. of nodes filtered out before writing the output trace")
         ;

     maxNumDependents
         .name(name() + ".maxNumDependents")
         .desc("Maximum number or dependents on any instruction")
         ;

     maxTempStoreSize
         .name(name() + ".maxTempStoreSize")
         .desc("Maximum size of the temporary store during the run")
         ;

     maxPhysRegDepMapSize
         .name(name() + ".maxPhysRegDepMapSize")
         .desc("Maximum size of register dependency map")
         ;
 }

 const std::string&
 ElasticTrace::TraceInfo::typeToStr() const
 {
     return Record::RecordType_Name(type);
 }

 const std::string
 ElasticTrace::name() const
 {
     return ProbeListenerObject::name();
 }

 void
 ElasticTrace::flushTraces()
 {
     // Write to trace all records in the depTrace.
     writeDepTrace(depTrace.size());
     // Delete the stream objects
     delete dataTraceStream;
     delete instTraceStream;
 }

 ElasticTrace*
 ElasticTraceParams::create()
 {
     return new ElasticTrace(this);
 }