| /* |
| * Copyright (c) 2014-2015 ARM Limited |
| * All rights reserved |
| * |
| * The license below extends only to copyright in the software and shall |
| * not be construed as granting a license to any other intellectual |
| * property including but not limited to intellectual property relating |
| * to a hardware implementation of the functionality of the software |
| * licensed hereunder. You may use the software subject to the license |
| * terms below provided that you ensure that this notice is replicated |
| * unmodified and in its entirety in all distributions of the software, |
| * modified or unmodified, in source code or in binary form. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Authors: Mitch Hayenga |
| */ |
| |
| #include "mem/cache/prefetch/queued.hh" |
| |
| #include <cassert> |
| |
| #include "arch/generic/tlb.hh" |
| #include "base/logging.hh" |
| #include "base/trace.hh" |
| #include "debug/HWPrefetch.hh" |
| #include "mem/cache/base.hh" |
| #include "mem/request.hh" |
| #include "params/QueuedPrefetcher.hh" |
| |
| void |
| QueuedPrefetcher::DeferredPacket::createPkt(Addr paddr, unsigned blk_size, |
| MasterID mid, bool tag_prefetch, |
| Tick t) { |
| /* Create a prefetch memory request */ |
| RequestPtr req = std::make_shared<Request>(paddr, blk_size, 0, mid); |
| |
| if (pfInfo.isSecure()) { |
| req->setFlags(Request::SECURE); |
| } |
| req->taskId(ContextSwitchTaskId::Prefetcher); |
| pkt = new Packet(req, MemCmd::HardPFReq); |
| pkt->allocate(); |
| if (tag_prefetch && pfInfo.hasPC()) { |
| // Tag prefetch packet with accessing pc |
| pkt->req->setPC(pfInfo.getPC()); |
| } |
| tick = t; |
| } |
| |
| void |
| QueuedPrefetcher::DeferredPacket::startTranslation(BaseTLB *tlb) |
| { |
| assert(translationRequest != nullptr); |
| if (!ongoingTranslation) { |
| ongoingTranslation = true; |
| // Prefetchers only operate in Timing mode |
| tlb->translateTiming(translationRequest, tc, this, BaseTLB::Read); |
| } |
| } |
| |
| void |
| QueuedPrefetcher::DeferredPacket::finish(const Fault &fault, |
| const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode) |
| { |
| assert(ongoingTranslation); |
| ongoingTranslation = false; |
| bool failed = (fault != NoFault); |
| owner->translationComplete(this, failed); |
| } |
| |
| QueuedPrefetcher::QueuedPrefetcher(const QueuedPrefetcherParams *p) |
| : BasePrefetcher(p), queueSize(p->queue_size), |
| missingTranslationQueueSize( |
| p->max_prefetch_requests_with_pending_translation), |
| latency(p->latency), queueSquash(p->queue_squash), |
| queueFilter(p->queue_filter), cacheSnoop(p->cache_snoop), |
| tagPrefetch(p->tag_prefetch), |
| throttleControlPct(p->throttle_control_percentage) |
| { |
| } |
| |
| QueuedPrefetcher::~QueuedPrefetcher() |
| { |
| // Delete the queued prefetch packets |
| for (DeferredPacket &p : pfq) { |
| delete p.pkt; |
| } |
| } |
| |
| size_t |
| QueuedPrefetcher::getMaxPermittedPrefetches(size_t total) const |
| { |
| /** |
| * Throttle generated prefetches based in the accuracy of the prefetcher. |
| * Accuracy is computed based in the ratio of useful prefetches with |
| * respect to the number of issued prefetches. |
| * |
| * The throttleControlPct controls how many of the candidate addresses |
| * generated by the prefetcher will be finally turned into prefetch |
| * requests |
| * - If set to 100, all candidates can be discarded (one request |
| * will always be allowed to be generated) |
| * - Setting it to 0 will disable the throttle control, so requests are |
| * created for all candidates |
| * - If set to 60, 40% of candidates will generate a request, and the |
| * remaining 60% will be generated depending on the current accuracy |
| */ |
| |
| size_t max_pfs = total; |
| if (total > 0 && issuedPrefetches > 0) { |
| size_t throttle_pfs = (total * throttleControlPct) / 100; |
| size_t min_pfs = (total - throttle_pfs) == 0 ? |
| 1 : (total - throttle_pfs); |
| max_pfs = min_pfs + (total - min_pfs) * |
| usefulPrefetches / issuedPrefetches; |
| } |
| return max_pfs; |
| } |
| |
| void |
| QueuedPrefetcher::notify(const PacketPtr &pkt, const PrefetchInfo &pfi) |
| { |
| Addr blk_addr = blockAddress(pfi.getAddr()); |
| bool is_secure = pfi.isSecure(); |
| |
| // Squash queued prefetches if demand miss to same line |
| if (queueSquash) { |
| auto itr = pfq.begin(); |
| while (itr != pfq.end()) { |
| if (itr->pfInfo.getAddr() == blk_addr && |
| itr->pfInfo.isSecure() == is_secure) { |
| delete itr->pkt; |
| itr = pfq.erase(itr); |
| } else { |
| ++itr; |
| } |
| } |
| } |
| |
| // Calculate prefetches given this access |
| std::vector<AddrPriority> addresses; |
| calculatePrefetch(pfi, addresses); |
| |
| // Get the maximu number of prefetches that we are allowed to generate |
| size_t max_pfs = getMaxPermittedPrefetches(addresses.size()); |
| |
| // Queue up generated prefetches |
| size_t num_pfs = 0; |
| for (AddrPriority& addr_prio : addresses) { |
| |
| // Block align prefetch address |
| addr_prio.first = blockAddress(addr_prio.first); |
| |
| if (!samePage(addr_prio.first, pfi.getAddr())) { |
| pfSpanPage += 1; |
| } |
| |
| bool can_cross_page = (tlb != nullptr); |
| if (can_cross_page || samePage(addr_prio.first, pfi.getAddr())) { |
| PrefetchInfo new_pfi(pfi,addr_prio.first); |
| pfIdentified++; |
| DPRINTF(HWPrefetch, "Found a pf candidate addr: %#x, " |
| "inserting into prefetch queue.\n", new_pfi.getAddr()); |
| // Create and insert the request |
| insert(pkt, new_pfi, addr_prio.second); |
| num_pfs += 1; |
| if (num_pfs == max_pfs) { |
| break; |
| } |
| } else { |
| DPRINTF(HWPrefetch, "Ignoring page crossing prefetch.\n"); |
| } |
| } |
| } |
| |
| PacketPtr |
| QueuedPrefetcher::getPacket() |
| { |
| DPRINTF(HWPrefetch, "Requesting a prefetch to issue.\n"); |
| |
| if (pfq.empty()) { |
| // If the queue is empty, attempt first to fill it with requests |
| // from the queue of missing translations |
| processMissingTranslations(queueSize); |
| } |
| |
| if (pfq.empty()) { |
| DPRINTF(HWPrefetch, "No hardware prefetches available.\n"); |
| return nullptr; |
| } |
| |
| PacketPtr pkt = pfq.front().pkt; |
| pfq.pop_front(); |
| |
| pfIssued++; |
| issuedPrefetches += 1; |
| assert(pkt != nullptr); |
| DPRINTF(HWPrefetch, "Generating prefetch for %#x.\n", pkt->getAddr()); |
| |
| processMissingTranslations(queueSize - pfq.size()); |
| return pkt; |
| } |
| |
| void |
| QueuedPrefetcher::regStats() |
| { |
| BasePrefetcher::regStats(); |
| |
| pfIdentified |
| .name(name() + ".pfIdentified") |
| .desc("number of prefetch candidates identified"); |
| |
| pfBufferHit |
| .name(name() + ".pfBufferHit") |
| .desc("number of redundant prefetches already in prefetch queue"); |
| |
| pfInCache |
| .name(name() + ".pfInCache") |
| .desc("number of redundant prefetches already in cache/mshr dropped"); |
| |
| pfRemovedFull |
| .name(name() + ".pfRemovedFull") |
| .desc("number of prefetches dropped due to prefetch queue size"); |
| |
| pfSpanPage |
| .name(name() + ".pfSpanPage") |
| .desc("number of prefetches that crossed the page"); |
| } |
| |
| |
| void |
| QueuedPrefetcher::processMissingTranslations(unsigned max) |
| { |
| unsigned count = 0; |
| iterator it = pfqMissingTranslation.begin(); |
| while (it != pfqMissingTranslation.end() && count < max) { |
| DeferredPacket &dp = *it; |
| // Increase the iterator first because dp.startTranslation can end up |
| // calling finishTranslation, which will erase "it" |
| it++; |
| dp.startTranslation(tlb); |
| count += 1; |
| } |
| } |
| |
| void |
| QueuedPrefetcher::translationComplete(DeferredPacket *dp, bool failed) |
| { |
| auto it = pfqMissingTranslation.begin(); |
| while (it != pfqMissingTranslation.end()) { |
| if (&(*it) == dp) { |
| break; |
| } |
| it++; |
| } |
| assert(it != pfqMissingTranslation.end()); |
| if (!failed) { |
| DPRINTF(HWPrefetch, "%s Translation of vaddr %#x succeeded: " |
| "paddr %#x \n", tlb->name(), |
| it->translationRequest->getVaddr(), |
| it->translationRequest->getPaddr()); |
| Addr target_paddr = it->translationRequest->getPaddr(); |
| // check if this prefetch is already redundant |
| if (cacheSnoop && (inCache(target_paddr, it->pfInfo.isSecure()) || |
| inMissQueue(target_paddr, it->pfInfo.isSecure()))) { |
| pfInCache++; |
| DPRINTF(HWPrefetch, "Dropping redundant in " |
| "cache/MSHR prefetch addr:%#x\n", target_paddr); |
| } else { |
| Tick pf_time = curTick() + clockPeriod() * latency; |
| it->createPkt(it->translationRequest->getPaddr(), blkSize, |
| masterId, tagPrefetch, pf_time); |
| addToQueue(pfq, *it); |
| } |
| } else { |
| DPRINTF(HWPrefetch, "%s Translation of vaddr %#x failed, dropping " |
| "prefetch request %#x \n", tlb->name(), |
| it->translationRequest->getVaddr()); |
| } |
| pfqMissingTranslation.erase(it); |
| } |
| |
| bool |
| QueuedPrefetcher::alreadyInQueue(std::list<DeferredPacket> &queue, |
| const PrefetchInfo &pfi, int32_t priority) |
| { |
| bool found = false; |
| iterator it; |
| for (it = queue.begin(); it != queue.end() && !found; it++) { |
| found = it->pfInfo.sameAddr(pfi); |
| } |
| |
| /* If the address is already in the queue, update priority and leave */ |
| if (it != queue.end()) { |
| pfBufferHit++; |
| if (it->priority < priority) { |
| /* Update priority value and position in the queue */ |
| it->priority = priority; |
| iterator prev = it; |
| while (prev != queue.begin()) { |
| prev--; |
| /* If the packet has higher priority, swap */ |
| if (*it > *prev) { |
| std::swap(*it, *prev); |
| it = prev; |
| } |
| } |
| DPRINTF(HWPrefetch, "Prefetch addr already in " |
| "prefetch queue, priority updated\n"); |
| } else { |
| DPRINTF(HWPrefetch, "Prefetch addr already in " |
| "prefetch queue\n"); |
| } |
| } |
| return found; |
| } |
| |
| RequestPtr |
| QueuedPrefetcher::createPrefetchRequest(Addr addr, PrefetchInfo const &pfi, |
| PacketPtr pkt) |
| { |
| RequestPtr translation_req = std::make_shared<Request>(pkt->req->getAsid(), |
| addr, blkSize, pkt->req->getFlags(), masterId, pfi.getPC(), |
| pkt->req->contextId()); |
| translation_req->setFlags(Request::PREFETCH); |
| return translation_req; |
| } |
| |
| void |
| QueuedPrefetcher::insert(const PacketPtr &pkt, PrefetchInfo &new_pfi, |
| int32_t priority) |
| { |
| if (queueFilter) { |
| if (alreadyInQueue(pfq, new_pfi, priority)) { |
| return; |
| } |
| if (alreadyInQueue(pfqMissingTranslation, new_pfi, priority)) { |
| return; |
| } |
| } |
| |
| /* |
| * Physical address computation |
| * if the prefetch is within the same page |
| * using VA: add the computed stride to the original PA |
| * using PA: no actions needed |
| * if we are page crossing |
| * using VA: Create a translaion request and enqueue the corresponding |
| * deferred packet to the queue of pending translations |
| * using PA: use the provided VA to obtain the target VA, then attempt to |
| * translate the resulting address |
| */ |
| |
| Addr orig_addr = useVirtualAddresses ? |
| pkt->req->getVaddr() : pkt->req->getPaddr(); |
| bool positive_stride = new_pfi.getAddr() >= orig_addr; |
| Addr stride = positive_stride ? |
| (new_pfi.getAddr() - orig_addr) : (orig_addr - new_pfi.getAddr()); |
| |
| Addr target_paddr; |
| bool has_target_pa = false; |
| RequestPtr translation_req = nullptr; |
| if (samePage(orig_addr, new_pfi.getAddr())) { |
| if (useVirtualAddresses) { |
| // if we trained with virtual addresses, |
| // compute the target PA using the original PA and adding the |
| // prefetch stride (difference between target VA and original VA) |
| target_paddr = positive_stride ? (pkt->req->getPaddr() + stride) : |
| (pkt->req->getPaddr() - stride); |
| } else { |
| target_paddr = new_pfi.getAddr(); |
| } |
| has_target_pa = true; |
| } else { |
| // Page crossing reference |
| |
| // ContextID is needed for translation |
| if (!pkt->req->hasContextId()) { |
| return; |
| } |
| if (useVirtualAddresses) { |
| has_target_pa = false; |
| translation_req = createPrefetchRequest(new_pfi.getAddr(), new_pfi, |
| pkt); |
| } else if (pkt->req->hasVaddr()) { |
| has_target_pa = false; |
| // Compute the target VA using req->getVaddr + stride |
| Addr target_vaddr = positive_stride ? |
| (pkt->req->getVaddr() + stride) : |
| (pkt->req->getVaddr() - stride); |
| translation_req = createPrefetchRequest(target_vaddr, new_pfi, |
| pkt); |
| } else { |
| // Using PA for training but the request does not have a VA, |
| // unable to process this page crossing prefetch. |
| return; |
| } |
| } |
| if (has_target_pa && cacheSnoop && |
| (inCache(target_paddr, new_pfi.isSecure()) || |
| inMissQueue(target_paddr, new_pfi.isSecure()))) { |
| pfInCache++; |
| DPRINTF(HWPrefetch, "Dropping redundant in " |
| "cache/MSHR prefetch addr:%#x\n", target_paddr); |
| return; |
| } |
| |
| /* Create the packet and find the spot to insert it */ |
| DeferredPacket dpp(this, new_pfi, 0, priority); |
| if (has_target_pa) { |
| Tick pf_time = curTick() + clockPeriod() * latency; |
| dpp.createPkt(target_paddr, blkSize, masterId, tagPrefetch, pf_time); |
| DPRINTF(HWPrefetch, "Prefetch queued. " |
| "addr:%#x priority: %3d tick:%lld.\n", |
| new_pfi.getAddr(), priority, pf_time); |
| addToQueue(pfq, dpp); |
| } else { |
| // Add the translation request and try to resolve it later |
| dpp.setTranslationRequest(translation_req); |
| dpp.tc = cache->system->getThreadContext(translation_req->contextId()); |
| DPRINTF(HWPrefetch, "Prefetch queued with no translation. " |
| "addr:%#x priority: %3d\n", new_pfi.getAddr(), priority); |
| addToQueue(pfqMissingTranslation, dpp); |
| } |
| } |
| |
| void |
| QueuedPrefetcher::addToQueue(std::list<DeferredPacket> &queue, |
| DeferredPacket &dpp) |
| { |
| /* Verify prefetch buffer space for request */ |
| if (queue.size() == queueSize) { |
| pfRemovedFull++; |
| /* Lowest priority packet */ |
| iterator it = queue.end(); |
| panic_if (it == queue.begin(), |
| "Prefetch queue is both full and empty!"); |
| --it; |
| /* Look for oldest in that level of priority */ |
| panic_if (it == queue.begin(), |
| "Prefetch queue is full with 1 element!"); |
| iterator prev = it; |
| bool cont = true; |
| /* While not at the head of the queue */ |
| while (cont && prev != queue.begin()) { |
| prev--; |
| /* While at the same level of priority */ |
| cont = prev->priority == it->priority; |
| if (cont) |
| /* update pointer */ |
| it = prev; |
| } |
| DPRINTF(HWPrefetch, "Prefetch queue full, removing lowest priority " |
| "oldest packet, addr: %#x\n",it->pfInfo.getAddr()); |
| delete it->pkt; |
| queue.erase(it); |
| } |
| |
| if (queue.size() == 0) { |
| queue.emplace_back(dpp); |
| } else { |
| iterator it = queue.end(); |
| do { |
| --it; |
| } while (it != queue.begin() && dpp > *it); |
| /* If we reach the head, we have to see if the new element is new head |
| * or not */ |
| if (it == queue.begin() && dpp <= *it) |
| it++; |
| queue.insert(it, dpp); |
| } |
| } |