| /* |
| * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. |
| * All rights reserved. |
| * |
| * For use for simulation and test purposes only |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * 3. Neither the name of the copyright holder nor the names of its |
| * contributors may be used to endorse or promote products derived from this |
| * software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Authors: Sooraj Puthoor |
| */ |
| |
| #include "base/logging.hh" |
| #include "base/str.hh" |
| #include "config/the_isa.hh" |
| |
| #if THE_ISA == X86_ISA |
| #include "arch/x86/insts/microldstop.hh" |
| |
| #endif // X86_ISA |
| #include "mem/ruby/system/VIPERCoalescer.hh" |
| |
| #include "cpu/testers/rubytest/RubyTester.hh" |
| #include "debug/GPUCoalescer.hh" |
| #include "debug/MemoryAccess.hh" |
| #include "mem/packet.hh" |
| #include "mem/ruby/common/SubBlock.hh" |
| #include "mem/ruby/network/MessageBuffer.hh" |
| #include "mem/ruby/profiler/Profiler.hh" |
| #include "mem/ruby/slicc_interface/AbstractController.hh" |
| #include "mem/ruby/slicc_interface/RubyRequest.hh" |
| #include "mem/ruby/structures/CacheMemory.hh" |
| #include "mem/ruby/system/GPUCoalescer.hh" |
| #include "mem/ruby/system/RubySystem.hh" |
| #include "params/VIPERCoalescer.hh" |
| |
| using namespace std; |
| |
| VIPERCoalescer * |
| VIPERCoalescerParams::create() |
| { |
| return new VIPERCoalescer(this); |
| } |
| |
| VIPERCoalescer::VIPERCoalescer(const Params *p) |
| : GPUCoalescer(p) |
| { |
| m_max_wb_per_cycle=p->max_wb_per_cycle; |
| m_max_inv_per_cycle=p->max_inv_per_cycle; |
| m_outstanding_inv = 0; |
| m_outstanding_wb = 0; |
| } |
| |
| VIPERCoalescer::~VIPERCoalescer() |
| { |
| } |
| |
| // Analyzes the packet to see if this request can be coalesced. |
| // If request can be coalesced, this request is added to the reqCoalescer table |
| // and makeRequest returns RequestStatus_Issued; |
| // If this is the first request to a cacheline, request is added to both |
| // newRequests queue and to the reqCoalescer table; makeRequest |
| // returns RequestStatus_Issued. |
| // If there is a pending request to this cacheline and this request |
| // can't be coalesced, RequestStatus_Aliased is returned and |
| // the packet needs to be reissued. |
| RequestStatus |
| VIPERCoalescer::makeRequest(PacketPtr pkt) |
| { |
| if (m_outstanding_wb | m_outstanding_inv) { |
| DPRINTF(GPUCoalescer, |
| "There are %d Writebacks and %d Invalidatons\n", |
| m_outstanding_wb, m_outstanding_inv); |
| } |
| // Are we in the middle of a release |
| if ((m_outstanding_wb) > 0) { |
| if (pkt->req->isKernel()) { |
| // Everythign is fine |
| // Barriers and Kernel End scan coalesce |
| // If it is a Kerenl Begin flush the cache |
| if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) { |
| invL1(); |
| } |
| |
| if (pkt->req->isRelease()) { |
| insertKernel(pkt->req->contextId(), pkt); |
| } |
| |
| return RequestStatus_Issued; |
| } |
| // return RequestStatus_Aliased; |
| } else if (pkt->req->isKernel() && pkt->req->isRelease()) { |
| // Flush Dirty Data on Kernel End |
| // isKernel + isRelease |
| insertKernel(pkt->req->contextId(), pkt); |
| wbL1(); |
| if (m_outstanding_wb == 0) { |
| for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { |
| newKernelEnds.push_back(it->first); |
| } |
| completeIssue(); |
| } |
| return RequestStatus_Issued; |
| } |
| RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt); |
| if (requestStatus!=RequestStatus_Issued) { |
| // Request not isssued |
| // enqueue Retry |
| DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n"); |
| return requestStatus; |
| } else if (pkt->req->isKernel() && pkt->req->isAcquire()) { |
| // Invalidate clean Data on Kernel Begin |
| // isKernel + isAcquire |
| invL1(); |
| } else if (pkt->req->isAcquire() && pkt->req->isRelease()) { |
| // Deschedule the AtomicAcqRel and |
| // Flush and Invalidate the L1 cache |
| invwbL1(); |
| if (m_outstanding_wb > 0 && issueEvent.scheduled()) { |
| DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); |
| deschedule(issueEvent); |
| } |
| } else if (pkt->req->isRelease()) { |
| // Deschedule the StoreRel and |
| // Flush the L1 cache |
| wbL1(); |
| if (m_outstanding_wb > 0 && issueEvent.scheduled()) { |
| DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); |
| deschedule(issueEvent); |
| } |
| } else if (pkt->req->isAcquire()) { |
| // LoadAcq or AtomicAcq |
| // Invalidate the L1 cache |
| invL1(); |
| } |
| // Request was successful |
| if (m_outstanding_wb == 0) { |
| if (!issueEvent.scheduled()) { |
| DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n"); |
| schedule(issueEvent, curTick()); |
| } |
| } |
| return RequestStatus_Issued; |
| } |
| |
| void |
| VIPERCoalescer::wbCallback(Addr addr) |
| { |
| m_outstanding_wb--; |
| // if L1 Flush Complete |
| // attemnpt to schedule issueEvent |
| assert(((int) m_outstanding_wb) >= 0); |
| if (m_outstanding_wb == 0) { |
| for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { |
| newKernelEnds.push_back(it->first); |
| } |
| completeIssue(); |
| } |
| trySendRetries(); |
| } |
| |
| void |
| VIPERCoalescer::invCallback(Addr addr) |
| { |
| m_outstanding_inv--; |
| // if L1 Flush Complete |
| // attemnpt to schedule issueEvent |
| // This probably won't happen, since |
| // we dont wait on cache invalidations |
| if (m_outstanding_wb == 0) { |
| for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { |
| newKernelEnds.push_back(it->first); |
| } |
| completeIssue(); |
| } |
| trySendRetries(); |
| } |
| |
| /** |
| * Invalidate L1 cache (Acquire) |
| */ |
| void |
| VIPERCoalescer::invL1() |
| { |
| int size = m_dataCache_ptr->getNumBlocks(); |
| DPRINTF(GPUCoalescer, |
| "There are %d Invalidations outstanding before Cache Walk\n", |
| m_outstanding_inv); |
| // Walk the cache |
| for (int i = 0; i < size; i++) { |
| Addr addr = m_dataCache_ptr->getAddressAtIdx(i); |
| // Evict Read-only data |
| RubyRequestType request_type = RubyRequestType_REPLACEMENT; |
| std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( |
| clockEdge(), addr, (uint8_t*) 0, 0, 0, |
| request_type, RubyAccessMode_Supervisor, |
| nullptr); |
| assert(m_mandatory_q_ptr != NULL); |
| Tick latency = cyclesToTicks( |
| m_controller->mandatoryQueueLatency(request_type)); |
| assert(latency > 0); |
| m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); |
| m_outstanding_inv++; |
| } |
| DPRINTF(GPUCoalescer, |
| "There are %d Invalidatons outstanding after Cache Walk\n", |
| m_outstanding_inv); |
| } |
| |
| /** |
| * Writeback L1 cache (Release) |
| */ |
| void |
| VIPERCoalescer::wbL1() |
| { |
| int size = m_dataCache_ptr->getNumBlocks(); |
| DPRINTF(GPUCoalescer, |
| "There are %d Writebacks outstanding before Cache Walk\n", |
| m_outstanding_wb); |
| // Walk the cache |
| for (int i = 0; i < size; i++) { |
| Addr addr = m_dataCache_ptr->getAddressAtIdx(i); |
| // Write dirty data back |
| RubyRequestType request_type = RubyRequestType_FLUSH; |
| std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( |
| clockEdge(), addr, (uint8_t*) 0, 0, 0, |
| request_type, RubyAccessMode_Supervisor, |
| nullptr); |
| assert(m_mandatory_q_ptr != NULL); |
| Tick latency = cyclesToTicks( |
| m_controller->mandatoryQueueLatency(request_type)); |
| assert(latency > 0); |
| m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); |
| m_outstanding_wb++; |
| } |
| DPRINTF(GPUCoalescer, |
| "There are %d Writebacks outstanding after Cache Walk\n", |
| m_outstanding_wb); |
| } |
| |
| /** |
| * Invalidate and Writeback L1 cache (Acquire&Release) |
| */ |
| void |
| VIPERCoalescer::invwbL1() |
| { |
| int size = m_dataCache_ptr->getNumBlocks(); |
| // Walk the cache |
| for (int i = 0; i < size; i++) { |
| Addr addr = m_dataCache_ptr->getAddressAtIdx(i); |
| // Evict Read-only data |
| RubyRequestType request_type = RubyRequestType_REPLACEMENT; |
| std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( |
| clockEdge(), addr, (uint8_t*) 0, 0, 0, |
| request_type, RubyAccessMode_Supervisor, |
| nullptr); |
| assert(m_mandatory_q_ptr != NULL); |
| Tick latency = cyclesToTicks( |
| m_controller->mandatoryQueueLatency(request_type)); |
| assert(latency > 0); |
| m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); |
| m_outstanding_inv++; |
| } |
| // Walk the cache |
| for (int i = 0; i< size; i++) { |
| Addr addr = m_dataCache_ptr->getAddressAtIdx(i); |
| // Write dirty data back |
| RubyRequestType request_type = RubyRequestType_FLUSH; |
| std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( |
| clockEdge(), addr, (uint8_t*) 0, 0, 0, |
| request_type, RubyAccessMode_Supervisor, |
| nullptr); |
| assert(m_mandatory_q_ptr != NULL); |
| Tick latency = cyclesToTicks( |
| m_controller->mandatoryQueueLatency(request_type)); |
| assert(latency > 0); |
| m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); |
| m_outstanding_wb++; |
| } |
| } |