| /* |
| * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /* |
| This file has been modified by Kevin Moore and Dan Nussbaum of the |
| Scalable Systems Research Group at Sun Microsystems Laboratories |
| (http://research.sun.com/scalable/) to support the Adaptive |
| Transactional Memory Test Platform (ATMTP). |
| |
| Please send email to atmtp-interest@sun.com with feedback, questions, or |
| to request future announcements about ATMTP. |
| |
| ---------------------------------------------------------------------- |
| |
| File modification date: 2008-02-23 |
| |
| ---------------------------------------------------------------------- |
| */ |
| |
| #include "mem/ruby/profiler/Profiler.hh" |
| |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| #include <algorithm> |
| #include <fstream> |
| |
| #include "base/stl_helpers.hh" |
| #include "base/str.hh" |
| #include "mem/ruby/network/Network.hh" |
| #include "mem/ruby/profiler/AddressProfiler.hh" |
| #include "mem/ruby/protocol/MachineType.hh" |
| #include "mem/ruby/protocol/RubyRequest.hh" |
| |
| /** |
| * the profiler uses GPUCoalescer code even |
| * though the GPUCoalescer is not built for |
| * all ISAs, which can lead to run/link time |
| * errors. here we guard the coalescer code |
| * with ifdefs as there is no easy way to |
| * refactor this code without removing |
| * GPUCoalescer stats from the profiler. |
| * |
| * eventually we should use probe points |
| * here, but until then these ifdefs will |
| * serve. |
| */ |
| #ifdef BUILD_GPU |
| #include "mem/ruby/system/GPUCoalescer.hh" |
| |
| #endif |
| |
| #include "mem/ruby/system/Sequencer.hh" |
| |
| using namespace std; |
| using m5::stl_helpers::operator<<; |
| |
| Profiler::Profiler(const RubySystemParams *p, RubySystem *rs) |
| : m_ruby_system(rs), m_hot_lines(p->hot_lines), |
| m_all_instructions(p->all_instructions), |
| m_num_vnets(p->number_of_virtual_networks) |
| { |
| m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); |
| m_address_profiler_ptr->setHotLines(m_hot_lines); |
| m_address_profiler_ptr->setAllInstructions(m_all_instructions); |
| |
| if (m_all_instructions) { |
| m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); |
| m_inst_profiler_ptr->setHotLines(m_hot_lines); |
| m_inst_profiler_ptr->setAllInstructions(m_all_instructions); |
| } |
| } |
| |
| Profiler::~Profiler() |
| { |
| } |
| |
| void |
| Profiler::regStats(const std::string &pName) |
| { |
| if (!m_all_instructions) { |
| m_address_profiler_ptr->regStats(pName); |
| } |
| |
| if (m_all_instructions) { |
| m_inst_profiler_ptr->regStats(pName); |
| } |
| |
| delayHistogram |
| .init(10) |
| .name(pName + ".delayHist") |
| .desc("delay histogram for all message") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| for (int i = 0; i < m_num_vnets; i++) { |
| delayVCHistogram.push_back(new Stats::Histogram()); |
| delayVCHistogram[i] |
| ->init(10) |
| .name(pName + csprintf(".delayVCHist.vnet_%i", i)) |
| .desc(csprintf("delay histogram for vnet_%i", i)) |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| } |
| |
| m_outstandReqHistSeqr |
| .init(10) |
| .name(pName + ".outstanding_req_hist_seqr") |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_outstandReqHistCoalsr |
| .init(10) |
| .name(pName + ".outstanding_req_hist_coalsr") |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_latencyHistSeqr |
| .init(10) |
| .name(pName + ".latency_hist_seqr") |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_latencyHistCoalsr |
| .init(10) |
| .name(pName + ".latency_hist_coalsr") |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_hitLatencyHistSeqr |
| .init(10) |
| .name(pName + ".hit_latency_hist_seqr") |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_missLatencyHistSeqr |
| .init(10) |
| .name(pName + ".miss_latency_hist_seqr") |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_missLatencyHistCoalsr |
| .init(10) |
| .name(pName + ".miss_latency_hist_coalsr") |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| for (int i = 0; i < RubyRequestType_NUM; i++) { |
| m_typeLatencyHistSeqr.push_back(new Stats::Histogram()); |
| m_typeLatencyHistSeqr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.latency_hist_seqr", |
| RubyRequestType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_typeLatencyHistCoalsr.push_back(new Stats::Histogram()); |
| m_typeLatencyHistCoalsr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.latency_hist_coalsr", |
| RubyRequestType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram()); |
| m_hitTypeLatencyHistSeqr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.hit_latency_hist_seqr", |
| RubyRequestType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram()); |
| m_missTypeLatencyHistSeqr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.miss_latency_hist_seqr", |
| RubyRequestType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram()); |
| m_missTypeLatencyHistCoalsr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.miss_latency_hist_coalsr", |
| RubyRequestType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| } |
| |
| for (int i = 0; i < MachineType_NUM; i++) { |
| m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram()); |
| m_hitMachLatencyHistSeqr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_missMachLatencyHistSeqr.push_back(new Stats::Histogram()); |
| m_missMachLatencyHistSeqr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram()); |
| m_missMachLatencyHistCoalsr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram()); |
| m_IssueToInitialDelayHistSeqr[i] |
| ->init(10) |
| .name(pName + csprintf( |
| ".%s.miss_latency_hist_seqr.issue_to_initial_request", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram()); |
| m_IssueToInitialDelayHistCoalsr[i] |
| ->init(10) |
| .name(pName + csprintf( |
| ".%s.miss_latency_hist_coalsr.issue_to_initial_request", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram()); |
| m_InitialToForwardDelayHistSeqr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram()); |
| m_InitialToForwardDelayHistCoalsr[i] |
| ->init(10) |
| .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram()); |
| m_ForwardToFirstResponseDelayHistSeqr[i] |
| ->init(10) |
| .name(pName + csprintf( |
| ".%s.miss_latency_hist_seqr.forward_to_first_response", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram()); |
| m_ForwardToFirstResponseDelayHistCoalsr[i] |
| ->init(10) |
| .name(pName + csprintf( |
| ".%s.miss_latency_hist_coalsr.forward_to_first_response", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram()); |
| m_FirstResponseToCompletionDelayHistSeqr[i] |
| ->init(10) |
| .name(pName + csprintf( |
| ".%s.miss_latency_hist_seqr.first_response_to_completion", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram()); |
| m_FirstResponseToCompletionDelayHistCoalsr[i] |
| ->init(10) |
| .name(pName + csprintf( |
| ".%s.miss_latency_hist_coalsr.first_response_to_completion", |
| MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_IncompleteTimesSeqr[i] |
| .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i))) |
| .desc("") |
| .flags(Stats::nozero); |
| } |
| |
| for (int i = 0; i < RubyRequestType_NUM; i++) { |
| m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>()); |
| m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>()); |
| m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>()); |
| |
| for (int j = 0; j < MachineType_NUM; j++) { |
| m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); |
| m_hitTypeMachLatencyHistSeqr[i][j] |
| ->init(10) |
| .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr", |
| RubyRequestType(i), MachineType(j))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); |
| m_missTypeMachLatencyHistSeqr[i][j] |
| ->init(10) |
| .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr", |
| RubyRequestType(i), MachineType(j))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| |
| m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram()); |
| m_missTypeMachLatencyHistCoalsr[i][j] |
| ->init(10) |
| .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr", |
| RubyRequestType(i), MachineType(j))) |
| .desc("") |
| .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
| } |
| } |
| } |
| |
| void |
| Profiler::collateStats() |
| { |
| if (!m_all_instructions) { |
| m_address_profiler_ptr->collateStats(); |
| } |
| |
| if (m_all_instructions) { |
| m_inst_profiler_ptr->collateStats(); |
| } |
| |
| for (uint32_t i = 0; i < MachineType_NUM; i++) { |
| for (map<uint32_t, AbstractController*>::iterator it = |
| m_ruby_system->m_abstract_controls[i].begin(); |
| it != m_ruby_system->m_abstract_controls[i].end(); ++it) { |
| |
| AbstractController *ctr = (*it).second; |
| delayHistogram.add(ctr->getDelayHist()); |
| |
| for (uint32_t i = 0; i < m_num_vnets; i++) { |
| delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); |
| } |
| } |
| } |
| |
| for (uint32_t i = 0; i < MachineType_NUM; i++) { |
| for (map<uint32_t, AbstractController*>::iterator it = |
| m_ruby_system->m_abstract_controls[i].begin(); |
| it != m_ruby_system->m_abstract_controls[i].end(); ++it) { |
| |
| AbstractController *ctr = (*it).second; |
| Sequencer *seq = ctr->getCPUSequencer(); |
| if (seq != NULL) { |
| m_outstandReqHistSeqr.add(seq->getOutstandReqHist()); |
| } |
| #ifdef BUILD_GPU |
| GPUCoalescer *coal = ctr->getGPUCoalescer(); |
| if (coal != NULL) { |
| m_outstandReqHistCoalsr.add(coal->getOutstandReqHist()); |
| } |
| #endif |
| } |
| } |
| |
| for (uint32_t i = 0; i < MachineType_NUM; i++) { |
| for (map<uint32_t, AbstractController*>::iterator it = |
| m_ruby_system->m_abstract_controls[i].begin(); |
| it != m_ruby_system->m_abstract_controls[i].end(); ++it) { |
| |
| AbstractController *ctr = (*it).second; |
| Sequencer *seq = ctr->getCPUSequencer(); |
| if (seq != NULL) { |
| // add all the latencies |
| m_latencyHistSeqr.add(seq->getLatencyHist()); |
| m_hitLatencyHistSeqr.add(seq->getHitLatencyHist()); |
| m_missLatencyHistSeqr.add(seq->getMissLatencyHist()); |
| |
| // add the per request type latencies |
| for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { |
| m_typeLatencyHistSeqr[j] |
| ->add(seq->getTypeLatencyHist(j)); |
| m_hitTypeLatencyHistSeqr[j] |
| ->add(seq->getHitTypeLatencyHist(j)); |
| m_missTypeLatencyHistSeqr[j] |
| ->add(seq->getMissTypeLatencyHist(j)); |
| } |
| |
| // add the per machine type miss latencies |
| for (uint32_t j = 0; j < MachineType_NUM; ++j) { |
| m_hitMachLatencyHistSeqr[j] |
| ->add(seq->getHitMachLatencyHist(j)); |
| m_missMachLatencyHistSeqr[j] |
| ->add(seq->getMissMachLatencyHist(j)); |
| |
| m_IssueToInitialDelayHistSeqr[j]->add( |
| seq->getIssueToInitialDelayHist(MachineType(j))); |
| |
| m_InitialToForwardDelayHistSeqr[j]->add( |
| seq->getInitialToForwardDelayHist(MachineType(j))); |
| m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq-> |
| getForwardRequestToFirstResponseHist(MachineType(j))); |
| |
| m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq-> |
| getFirstResponseToCompletionDelayHist( |
| MachineType(j))); |
| m_IncompleteTimesSeqr[j] += |
| seq->getIncompleteTimes(MachineType(j)); |
| } |
| |
| // add the per (request, machine) type miss latencies |
| for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { |
| for (uint32_t k = 0; k < MachineType_NUM; k++) { |
| m_hitTypeMachLatencyHistSeqr[j][k]->add( |
| seq->getHitTypeMachLatencyHist(j,k)); |
| m_missTypeMachLatencyHistSeqr[j][k]->add( |
| seq->getMissTypeMachLatencyHist(j,k)); |
| } |
| } |
| } |
| #ifdef BUILD_GPU |
| GPUCoalescer *coal = ctr->getGPUCoalescer(); |
| if (coal != NULL) { |
| // add all the latencies |
| m_latencyHistCoalsr.add(coal->getLatencyHist()); |
| m_missLatencyHistCoalsr.add(coal->getMissLatencyHist()); |
| |
| // add the per request type latencies |
| for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { |
| m_typeLatencyHistCoalsr[j] |
| ->add(coal->getTypeLatencyHist(j)); |
| m_missTypeLatencyHistCoalsr[j] |
| ->add(coal->getMissTypeLatencyHist(j)); |
| } |
| |
| // add the per machine type miss latencies |
| for (uint32_t j = 0; j < MachineType_NUM; ++j) { |
| m_missMachLatencyHistCoalsr[j] |
| ->add(coal->getMissMachLatencyHist(j)); |
| |
| m_IssueToInitialDelayHistCoalsr[j]->add( |
| coal->getIssueToInitialDelayHist(MachineType(j))); |
| |
| m_InitialToForwardDelayHistCoalsr[j]->add( |
| coal->getInitialToForwardDelayHist(MachineType(j))); |
| m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal-> |
| getForwardRequestToFirstResponseHist(MachineType(j))); |
| |
| m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal-> |
| getFirstResponseToCompletionDelayHist( |
| MachineType(j))); |
| } |
| |
| // add the per (request, machine) type miss latencies |
| for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { |
| for (uint32_t k = 0; k < MachineType_NUM; k++) { |
| m_missTypeMachLatencyHistCoalsr[j][k]->add( |
| coal->getMissTypeMachLatencyHist(j,k)); |
| } |
| } |
| } |
| #endif |
| } |
| } |
| } |
| |
| void |
| Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) |
| { |
| if (msg.getType() != RubyRequestType_IFETCH) { |
| // Note: The following line should be commented out if you |
| // want to use the special profiling that is part of the GS320 |
| // protocol |
| |
| // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be |
| // profiled by the AddressProfiler |
| m_address_profiler_ptr-> |
| addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), |
| msg.getType(), msg.getAccessMode(), id, false); |
| } |
| } |