| /* |
| * Copyright (c) 2012-2013, 2018-2019 ARM Limited |
| * All rights reserved. |
| * |
| * The license below extends only to copyright in the software and shall |
| * not be construed as granting a license to any other intellectual |
| * property including but not limited to intellectual property relating |
| * to a hardware implementation of the functionality of the software |
| * licensed hereunder. You may use the software subject to the license |
| * terms below provided that you ensure that this notice is replicated |
| * unmodified and in its entirety in all distributions of the software, |
| * modified or unmodified, in source code or in binary form. |
| * |
| * Copyright (c) 2003-2005 The Regents of The University of Michigan |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /** |
| * @file |
| * Definition of BaseCache functions. |
| */ |
| |
| #include "mem/cache/base.hh" |
| |
| #include "base/compiler.hh" |
| #include "base/logging.hh" |
| #include "debug/Cache.hh" |
| #include "debug/CacheComp.hh" |
| #include "debug/CachePort.hh" |
| #include "debug/CacheRepl.hh" |
| #include "debug/CacheVerbose.hh" |
| #include "debug/HWPrefetch.hh" |
| #include "mem/cache/compressors/base.hh" |
| #include "mem/cache/mshr.hh" |
| #include "mem/cache/prefetch/base.hh" |
| #include "mem/cache/queue_entry.hh" |
| #include "mem/cache/tags/compressed_tags.hh" |
| #include "mem/cache/tags/super_blk.hh" |
| #include "params/BaseCache.hh" |
| #include "params/WriteAllocator.hh" |
| #include "sim/cur_tick.hh" |
| |
| namespace gem5 |
| { |
| |
| BaseCache::CacheResponsePort::CacheResponsePort(const std::string &_name, |
| BaseCache *_cache, |
| const std::string &_label) |
| : QueuedResponsePort(_name, _cache, queue), |
| queue(*_cache, *this, true, _label), |
| blocked(false), mustSendRetry(false), |
| sendRetryEvent([this]{ processSendRetry(); }, _name) |
| { |
| } |
| |
| BaseCache::BaseCache(const BaseCacheParams &p, unsigned blk_size) |
| : ClockedObject(p), |
| cpuSidePort (p.name + ".cpu_side_port", this, "CpuSidePort"), |
| memSidePort(p.name + ".mem_side_port", this, "MemSidePort"), |
| mshrQueue("MSHRs", p.mshrs, 0, p.demand_mshr_reserve, p.name), |
| writeBuffer("write buffer", p.write_buffers, p.mshrs, p.name), |
| tags(p.tags), |
| compressor(p.compressor), |
| prefetcher(p.prefetcher), |
| writeAllocator(p.write_allocator), |
| writebackClean(p.writeback_clean), |
| tempBlockWriteback(nullptr), |
| writebackTempBlockAtomicEvent([this]{ writebackTempBlockAtomic(); }, |
| name(), false, |
| EventBase::Delayed_Writeback_Pri), |
| blkSize(blk_size), |
| lookupLatency(p.tag_latency), |
| dataLatency(p.data_latency), |
| forwardLatency(p.tag_latency), |
| fillLatency(p.data_latency), |
| responseLatency(p.response_latency), |
| sequentialAccess(p.sequential_access), |
| numTarget(p.tgts_per_mshr), |
| forwardSnoops(true), |
| clusivity(p.clusivity), |
| isReadOnly(p.is_read_only), |
| replaceExpansions(p.replace_expansions), |
| moveContractions(p.move_contractions), |
| blocked(0), |
| order(0), |
| noTargetMSHR(nullptr), |
| missCount(p.max_miss_count), |
| addrRanges(p.addr_ranges.begin(), p.addr_ranges.end()), |
| system(p.system), |
| stats(*this) |
| { |
| // the MSHR queue has no reserve entries as we check the MSHR |
| // queue on every single allocation, whereas the write queue has |
| // as many reserve entries as we have MSHRs, since every MSHR may |
| // eventually require a writeback, and we do not check the write |
| // buffer before committing to an MSHR |
| |
| // forward snoops is overridden in init() once we can query |
| // whether the connected requestor is actually snooping or not |
| |
| tempBlock = new TempCacheBlk(blkSize); |
| |
| tags->tagsInit(); |
| if (prefetcher) |
| prefetcher->setCache(this); |
| |
| fatal_if(compressor && !dynamic_cast<CompressedTags*>(tags), |
| "The tags of compressed cache %s must derive from CompressedTags", |
| name()); |
| warn_if(!compressor && dynamic_cast<CompressedTags*>(tags), |
| "Compressed cache %s does not have a compression algorithm", name()); |
| if (compressor) |
| compressor->setCache(this); |
| } |
| |
| BaseCache::~BaseCache() |
| { |
| delete tempBlock; |
| } |
| |
| void |
| BaseCache::CacheResponsePort::setBlocked() |
| { |
| assert(!blocked); |
| DPRINTF(CachePort, "Port is blocking new requests\n"); |
| blocked = true; |
| // if we already scheduled a retry in this cycle, but it has not yet |
| // happened, cancel it |
| if (sendRetryEvent.scheduled()) { |
| owner.deschedule(sendRetryEvent); |
| DPRINTF(CachePort, "Port descheduled retry\n"); |
| mustSendRetry = true; |
| } |
| } |
| |
| void |
| BaseCache::CacheResponsePort::clearBlocked() |
| { |
| assert(blocked); |
| DPRINTF(CachePort, "Port is accepting new requests\n"); |
| blocked = false; |
| if (mustSendRetry) { |
| // @TODO: need to find a better time (next cycle?) |
| owner.schedule(sendRetryEvent, curTick() + 1); |
| } |
| } |
| |
| void |
| BaseCache::CacheResponsePort::processSendRetry() |
| { |
| DPRINTF(CachePort, "Port is sending retry\n"); |
| |
| // reset the flag and call retry |
| mustSendRetry = false; |
| sendRetryReq(); |
| } |
| |
| Addr |
| BaseCache::regenerateBlkAddr(CacheBlk* blk) |
| { |
| if (blk != tempBlock) { |
| return tags->regenerateBlkAddr(blk); |
| } else { |
| return tempBlock->getAddr(); |
| } |
| } |
| |
| void |
| BaseCache::init() |
| { |
| if (!cpuSidePort.isConnected() || !memSidePort.isConnected()) |
| fatal("Cache ports on %s are not connected\n", name()); |
| cpuSidePort.sendRangeChange(); |
| forwardSnoops = cpuSidePort.isSnooping(); |
| } |
| |
| Port & |
| BaseCache::getPort(const std::string &if_name, PortID idx) |
| { |
| if (if_name == "mem_side") { |
| return memSidePort; |
| } else if (if_name == "cpu_side") { |
| return cpuSidePort; |
| } else { |
| return ClockedObject::getPort(if_name, idx); |
| } |
| } |
| |
| bool |
| BaseCache::inRange(Addr addr) const |
| { |
| for (const auto& r : addrRanges) { |
| if (r.contains(addr)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| void |
| BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time) |
| { |
| if (pkt->needsResponse()) { |
| // These delays should have been consumed by now |
| assert(pkt->headerDelay == 0); |
| assert(pkt->payloadDelay == 0); |
| |
| pkt->makeTimingResponse(); |
| |
| // In this case we are considering request_time that takes |
| // into account the delay of the xbar, if any, and just |
| // lat, neglecting responseLatency, modelling hit latency |
| // just as the value of lat overriden by access(), which calls |
| // the calculateAccessLatency() function. |
| cpuSidePort.schedTimingResp(pkt, request_time); |
| } else { |
| DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__, |
| pkt->print()); |
| |
| // queue the packet for deletion, as the sending cache is |
| // still relying on it; if the block is found in access(), |
| // CleanEvict and Writeback messages will be deleted |
| // here as well |
| pendingDelete.reset(pkt); |
| } |
| } |
| |
| void |
| BaseCache::handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk, |
| Tick forward_time, Tick request_time) |
| { |
| if (writeAllocator && |
| pkt && pkt->isWrite() && !pkt->req->isUncacheable()) { |
| writeAllocator->updateMode(pkt->getAddr(), pkt->getSize(), |
| pkt->getBlockAddr(blkSize)); |
| } |
| |
| if (mshr) { |
| /// MSHR hit |
| /// @note writebacks will be checked in getNextMSHR() |
| /// for any conflicting requests to the same block |
| |
| //@todo remove hw_pf here |
| |
| // Coalesce unless it was a software prefetch (see above). |
| if (pkt) { |
| assert(!pkt->isWriteback()); |
| // CleanEvicts corresponding to blocks which have |
| // outstanding requests in MSHRs are simply sunk here |
| if (pkt->cmd == MemCmd::CleanEvict) { |
| pendingDelete.reset(pkt); |
| } else if (pkt->cmd == MemCmd::WriteClean) { |
| // A WriteClean should never coalesce with any |
| // outstanding cache maintenance requests. |
| |
| // We use forward_time here because there is an |
| // uncached memory write, forwarded to WriteBuffer. |
| allocateWriteBuffer(pkt, forward_time); |
| } else { |
| DPRINTF(Cache, "%s coalescing MSHR for %s\n", __func__, |
| pkt->print()); |
| |
| assert(pkt->req->requestorId() < system->maxRequestors()); |
| stats.cmdStats(pkt).mshrHits[pkt->req->requestorId()]++; |
| |
| // We use forward_time here because it is the same |
| // considering new targets. We have multiple |
| // requests for the same address here. It |
| // specifies the latency to allocate an internal |
| // buffer and to schedule an event to the queued |
| // port and also takes into account the additional |
| // delay of the xbar. |
| mshr->allocateTarget(pkt, forward_time, order++, |
| allocOnFill(pkt->cmd)); |
| if (mshr->getNumTargets() >= numTarget) { |
| noTargetMSHR = mshr; |
| setBlocked(Blocked_NoTargets); |
| // need to be careful with this... if this mshr isn't |
| // ready yet (i.e. time > curTick()), we don't want to |
| // move it ahead of mshrs that are ready |
| // mshrQueue.moveToFront(mshr); |
| } |
| } |
| } |
| } else { |
| // no MSHR |
| assert(pkt->req->requestorId() < system->maxRequestors()); |
| stats.cmdStats(pkt).mshrMisses[pkt->req->requestorId()]++; |
| if (prefetcher && pkt->isDemand()) |
| prefetcher->incrDemandMhsrMisses(); |
| |
| if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean) { |
| // We use forward_time here because there is an |
| // writeback or writeclean, forwarded to WriteBuffer. |
| allocateWriteBuffer(pkt, forward_time); |
| } else { |
| if (blk && blk->isValid()) { |
| // If we have a write miss to a valid block, we |
| // need to mark the block non-readable. Otherwise |
| // if we allow reads while there's an outstanding |
| // write miss, the read could return stale data |
| // out of the cache block... a more aggressive |
| // system could detect the overlap (if any) and |
| // forward data out of the MSHRs, but we don't do |
| // that yet. Note that we do need to leave the |
| // block valid so that it stays in the cache, in |
| // case we get an upgrade response (and hence no |
| // new data) when the write miss completes. |
| // As long as CPUs do proper store/load forwarding |
| // internally, and have a sufficiently weak memory |
| // model, this is probably unnecessary, but at some |
| // point it must have seemed like we needed it... |
| assert((pkt->needsWritable() && |
| !blk->isSet(CacheBlk::WritableBit)) || |
| pkt->req->isCacheMaintenance()); |
| blk->clearCoherenceBits(CacheBlk::ReadableBit); |
| } |
| // Here we are using forward_time, modelling the latency of |
| // a miss (outbound) just as forwardLatency, neglecting the |
| // lookupLatency component. |
| allocateMissBuffer(pkt, forward_time); |
| } |
| } |
| } |
| |
| void |
| BaseCache::recvTimingReq(PacketPtr pkt) |
| { |
| // anything that is merely forwarded pays for the forward latency and |
| // the delay provided by the crossbar |
| Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay; |
| |
| Cycles lat; |
| CacheBlk *blk = nullptr; |
| bool satisfied = false; |
| { |
| PacketList writebacks; |
| // Note that lat is passed by reference here. The function |
| // access() will set the lat value. |
| satisfied = access(pkt, blk, lat, writebacks); |
| |
| // After the evicted blocks are selected, they must be forwarded |
| // to the write buffer to ensure they logically precede anything |
| // happening below |
| doWritebacks(writebacks, clockEdge(lat + forwardLatency)); |
| } |
| |
| // Here we charge the headerDelay that takes into account the latencies |
| // of the bus, if the packet comes from it. |
| // The latency charged is just the value set by the access() function. |
| // In case of a hit we are neglecting response latency. |
| // In case of a miss we are neglecting forward latency. |
| Tick request_time = clockEdge(lat); |
| // Here we reset the timing of the packet. |
| pkt->headerDelay = pkt->payloadDelay = 0; |
| |
| if (satisfied) { |
| // notify before anything else as later handleTimingReqHit might turn |
| // the packet in a response |
| ppHit->notify(pkt); |
| |
| if (prefetcher && blk && blk->wasPrefetched()) { |
| DPRINTF(Cache, "Hit on prefetch for addr %#x (%s)\n", |
| pkt->getAddr(), pkt->isSecure() ? "s" : "ns"); |
| blk->clearPrefetched(); |
| } |
| |
| handleTimingReqHit(pkt, blk, request_time); |
| } else { |
| handleTimingReqMiss(pkt, blk, forward_time, request_time); |
| |
| ppMiss->notify(pkt); |
| } |
| |
| if (prefetcher) { |
| // track time of availability of next prefetch, if any |
| Tick next_pf_time = prefetcher->nextPrefetchReadyTime(); |
| if (next_pf_time != MaxTick) { |
| schedMemSideSendEvent(next_pf_time); |
| } |
| } |
| } |
| |
| void |
| BaseCache::handleUncacheableWriteResp(PacketPtr pkt) |
| { |
| Tick completion_time = clockEdge(responseLatency) + |
| pkt->headerDelay + pkt->payloadDelay; |
| |
| // Reset the bus additional time as it is now accounted for |
| pkt->headerDelay = pkt->payloadDelay = 0; |
| |
| cpuSidePort.schedTimingResp(pkt, completion_time); |
| } |
| |
| void |
| BaseCache::recvTimingResp(PacketPtr pkt) |
| { |
| assert(pkt->isResponse()); |
| |
| // all header delay should be paid for by the crossbar, unless |
| // this is a prefetch response from above |
| panic_if(pkt->headerDelay != 0 && pkt->cmd != MemCmd::HardPFResp, |
| "%s saw a non-zero packet delay\n", name()); |
| |
| const bool is_error = pkt->isError(); |
| |
| if (is_error) { |
| DPRINTF(Cache, "%s: Cache received %s with error\n", __func__, |
| pkt->print()); |
| } |
| |
| DPRINTF(Cache, "%s: Handling response %s\n", __func__, |
| pkt->print()); |
| |
| // if this is a write, we should be looking at an uncacheable |
| // write |
| if (pkt->isWrite()) { |
| assert(pkt->req->isUncacheable()); |
| handleUncacheableWriteResp(pkt); |
| return; |
| } |
| |
| // we have dealt with any (uncacheable) writes above, from here on |
| // we know we are dealing with an MSHR due to a miss or a prefetch |
| MSHR *mshr = dynamic_cast<MSHR*>(pkt->popSenderState()); |
| assert(mshr); |
| |
| if (mshr == noTargetMSHR) { |
| // we always clear at least one target |
| clearBlocked(Blocked_NoTargets); |
| noTargetMSHR = nullptr; |
| } |
| |
| // Initial target is used just for stats |
| const QueueEntry::Target *initial_tgt = mshr->getTarget(); |
| const Tick miss_latency = curTick() - initial_tgt->recvTime; |
| if (pkt->req->isUncacheable()) { |
| assert(pkt->req->requestorId() < system->maxRequestors()); |
| stats.cmdStats(initial_tgt->pkt) |
| .mshrUncacheableLatency[pkt->req->requestorId()] += miss_latency; |
| } else { |
| assert(pkt->req->requestorId() < system->maxRequestors()); |
| stats.cmdStats(initial_tgt->pkt) |
| .mshrMissLatency[pkt->req->requestorId()] += miss_latency; |
| } |
| |
| PacketList writebacks; |
| |
| bool is_fill = !mshr->isForward && |
| (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp || |
| mshr->wasWholeLineWrite); |
| |
| // make sure that if the mshr was due to a whole line write then |
| // the response is an invalidation |
| assert(!mshr->wasWholeLineWrite || pkt->isInvalidate()); |
| |
| CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure()); |
| |
| if (is_fill && !is_error) { |
| DPRINTF(Cache, "Block for addr %#llx being updated in Cache\n", |
| pkt->getAddr()); |
| |
| const bool allocate = (writeAllocator && mshr->wasWholeLineWrite) ? |
| writeAllocator->allocate() : mshr->allocOnFill(); |
| blk = handleFill(pkt, blk, writebacks, allocate); |
| assert(blk != nullptr); |
| ppFill->notify(pkt); |
| } |
| |
| if (blk && blk->isValid() && pkt->isClean() && !pkt->isInvalidate()) { |
| // The block was marked not readable while there was a pending |
| // cache maintenance operation, restore its flag. |
| blk->setCoherenceBits(CacheBlk::ReadableBit); |
| |
| // This was a cache clean operation (without invalidate) |
| // and we have a copy of the block already. Since there |
| // is no invalidation, we can promote targets that don't |
| // require a writable copy |
| mshr->promoteReadable(); |
| } |
| |
| if (blk && blk->isSet(CacheBlk::WritableBit) && |
| !pkt->req->isCacheInvalidate()) { |
| // If at this point the referenced block is writable and the |
| // response is not a cache invalidate, we promote targets that |
| // were deferred as we couldn't guarrantee a writable copy |
| mshr->promoteWritable(); |
| } |
| |
| serviceMSHRTargets(mshr, pkt, blk); |
| |
| if (mshr->promoteDeferredTargets()) { |
| // avoid later read getting stale data while write miss is |
| // outstanding.. see comment in timingAccess() |
| if (blk) { |
| blk->clearCoherenceBits(CacheBlk::ReadableBit); |
| } |
| mshrQueue.markPending(mshr); |
| schedMemSideSendEvent(clockEdge() + pkt->payloadDelay); |
| } else { |
| // while we deallocate an mshr from the queue we still have to |
| // check the isFull condition before and after as we might |
| // have been using the reserved entries already |
| const bool was_full = mshrQueue.isFull(); |
| mshrQueue.deallocate(mshr); |
| if (was_full && !mshrQueue.isFull()) { |
| clearBlocked(Blocked_NoMSHRs); |
| } |
| |
| // Request the bus for a prefetch if this deallocation freed enough |
| // MSHRs for a prefetch to take place |
| if (prefetcher && mshrQueue.canPrefetch() && !isBlocked()) { |
| Tick next_pf_time = std::max(prefetcher->nextPrefetchReadyTime(), |
| clockEdge()); |
| if (next_pf_time != MaxTick) |
| schedMemSideSendEvent(next_pf_time); |
| } |
| } |
| |
| // if we used temp block, check to see if its valid and then clear it out |
| if (blk == tempBlock && tempBlock->isValid()) { |
| evictBlock(blk, writebacks); |
| } |
| |
| const Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay; |
| // copy writebacks to write buffer |
| doWritebacks(writebacks, forward_time); |
| |
| DPRINTF(CacheVerbose, "%s: Leaving with %s\n", __func__, pkt->print()); |
| delete pkt; |
| } |
| |
| |
| Tick |
| BaseCache::recvAtomic(PacketPtr pkt) |
| { |
| // should assert here that there are no outstanding MSHRs or |
| // writebacks... that would mean that someone used an atomic |
| // access in timing mode |
| |
| // We use lookupLatency here because it is used to specify the latency |
| // to access. |
| Cycles lat = lookupLatency; |
| |
| CacheBlk *blk = nullptr; |
| PacketList writebacks; |
| bool satisfied = access(pkt, blk, lat, writebacks); |
| |
| if (pkt->isClean() && blk && blk->isSet(CacheBlk::DirtyBit)) { |
| // A cache clean opearation is looking for a dirty |
| // block. If a dirty block is encountered a WriteClean |
| // will update any copies to the path to the memory |
| // until the point of reference. |
| DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n", |
| __func__, pkt->print(), blk->print()); |
| PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id); |
| writebacks.push_back(wb_pkt); |
| pkt->setSatisfied(); |
| } |
| |
| // handle writebacks resulting from the access here to ensure they |
| // logically precede anything happening below |
| doWritebacksAtomic(writebacks); |
| assert(writebacks.empty()); |
| |
| if (!satisfied) { |
| lat += handleAtomicReqMiss(pkt, blk, writebacks); |
| } |
| |
| // Note that we don't invoke the prefetcher at all in atomic mode. |
| // It's not clear how to do it properly, particularly for |
| // prefetchers that aggressively generate prefetch candidates and |
| // rely on bandwidth contention to throttle them; these will tend |
| // to pollute the cache in atomic mode since there is no bandwidth |
| // contention. If we ever do want to enable prefetching in atomic |
| // mode, though, this is the place to do it... see timingAccess() |
| // for an example (though we'd want to issue the prefetch(es) |
| // immediately rather than calling requestMemSideBus() as we do |
| // there). |
| |
| // do any writebacks resulting from the response handling |
| doWritebacksAtomic(writebacks); |
| |
| // if we used temp block, check to see if its valid and if so |
| // clear it out, but only do so after the call to recvAtomic is |
| // finished so that any downstream observers (such as a snoop |
| // filter), first see the fill, and only then see the eviction |
| if (blk == tempBlock && tempBlock->isValid()) { |
| // the atomic CPU calls recvAtomic for fetch and load/store |
| // sequentuially, and we may already have a tempBlock |
| // writeback from the fetch that we have not yet sent |
| if (tempBlockWriteback) { |
| // if that is the case, write the prevoius one back, and |
| // do not schedule any new event |
| writebackTempBlockAtomic(); |
| } else { |
| // the writeback/clean eviction happens after the call to |
| // recvAtomic has finished (but before any successive |
| // calls), so that the response handling from the fill is |
| // allowed to happen first |
| schedule(writebackTempBlockAtomicEvent, curTick()); |
| } |
| |
| tempBlockWriteback = evictBlock(blk); |
| } |
| |
| if (pkt->needsResponse()) { |
| pkt->makeAtomicResponse(); |
| } |
| |
| return lat * clockPeriod(); |
| } |
| |
| void |
| BaseCache::functionalAccess(PacketPtr pkt, bool from_cpu_side) |
| { |
| Addr blk_addr = pkt->getBlockAddr(blkSize); |
| bool is_secure = pkt->isSecure(); |
| CacheBlk *blk = tags->findBlock(pkt->getAddr(), is_secure); |
| MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure); |
| |
| pkt->pushLabel(name()); |
| |
| CacheBlkPrintWrapper cbpw(blk); |
| |
| // Note that just because an L2/L3 has valid data doesn't mean an |
| // L1 doesn't have a more up-to-date modified copy that still |
| // needs to be found. As a result we always update the request if |
| // we have it, but only declare it satisfied if we are the owner. |
| |
| // see if we have data at all (owned or otherwise) |
| bool have_data = blk && blk->isValid() |
| && pkt->trySatisfyFunctional(&cbpw, blk_addr, is_secure, blkSize, |
| blk->data); |
| |
| // data we have is dirty if marked as such or if we have an |
| // in-service MSHR that is pending a modified line |
| bool have_dirty = |
| have_data && (blk->isSet(CacheBlk::DirtyBit) || |
| (mshr && mshr->inService && mshr->isPendingModified())); |
| |
| bool done = have_dirty || |
| cpuSidePort.trySatisfyFunctional(pkt) || |
| mshrQueue.trySatisfyFunctional(pkt) || |
| writeBuffer.trySatisfyFunctional(pkt) || |
| memSidePort.trySatisfyFunctional(pkt); |
| |
| DPRINTF(CacheVerbose, "%s: %s %s%s%s\n", __func__, pkt->print(), |
| (blk && blk->isValid()) ? "valid " : "", |
| have_data ? "data " : "", done ? "done " : ""); |
| |
| // We're leaving the cache, so pop cache->name() label |
| pkt->popLabel(); |
| |
| if (done) { |
| pkt->makeResponse(); |
| } else { |
| // if it came as a request from the CPU side then make sure it |
| // continues towards the memory side |
| if (from_cpu_side) { |
| memSidePort.sendFunctional(pkt); |
| } else if (cpuSidePort.isSnooping()) { |
| // if it came from the memory side, it must be a snoop request |
| // and we should only forward it if we are forwarding snoops |
| cpuSidePort.sendFunctionalSnoop(pkt); |
| } |
| } |
| } |
| |
| void |
| BaseCache::updateBlockData(CacheBlk *blk, const PacketPtr cpkt, |
| bool has_old_data) |
| { |
| DataUpdate data_update(regenerateBlkAddr(blk), blk->isSecure()); |
| if (ppDataUpdate->hasListeners()) { |
| if (has_old_data) { |
| data_update.oldData = std::vector<uint64_t>(blk->data, |
| blk->data + (blkSize / sizeof(uint64_t))); |
| } |
| } |
| |
| // Actually perform the data update |
| if (cpkt) { |
| cpkt->writeDataToBlock(blk->data, blkSize); |
| } |
| |
| if (ppDataUpdate->hasListeners()) { |
| if (cpkt) { |
| data_update.newData = std::vector<uint64_t>(blk->data, |
| blk->data + (blkSize / sizeof(uint64_t))); |
| } |
| ppDataUpdate->notify(data_update); |
| } |
| } |
| |
| void |
| BaseCache::cmpAndSwap(CacheBlk *blk, PacketPtr pkt) |
| { |
| assert(pkt->isRequest()); |
| |
| uint64_t overwrite_val; |
| bool overwrite_mem; |
| uint64_t condition_val64; |
| uint32_t condition_val32; |
| |
| int offset = pkt->getOffset(blkSize); |
| uint8_t *blk_data = blk->data + offset; |
| |
| assert(sizeof(uint64_t) >= pkt->getSize()); |
| |
| // Get a copy of the old block's contents for the probe before the update |
| DataUpdate data_update(regenerateBlkAddr(blk), blk->isSecure()); |
| if (ppDataUpdate->hasListeners()) { |
| data_update.oldData = std::vector<uint64_t>(blk->data, |
| blk->data + (blkSize / sizeof(uint64_t))); |
| } |
| |
| overwrite_mem = true; |
| // keep a copy of our possible write value, and copy what is at the |
| // memory address into the packet |
| pkt->writeData((uint8_t *)&overwrite_val); |
| pkt->setData(blk_data); |
| |
| if (pkt->req->isCondSwap()) { |
| if (pkt->getSize() == sizeof(uint64_t)) { |
| condition_val64 = pkt->req->getExtraData(); |
| overwrite_mem = !std::memcmp(&condition_val64, blk_data, |
| sizeof(uint64_t)); |
| } else if (pkt->getSize() == sizeof(uint32_t)) { |
| condition_val32 = (uint32_t)pkt->req->getExtraData(); |
| overwrite_mem = !std::memcmp(&condition_val32, blk_data, |
| sizeof(uint32_t)); |
| } else |
| panic("Invalid size for conditional read/write\n"); |
| } |
| |
| if (overwrite_mem) { |
| std::memcpy(blk_data, &overwrite_val, pkt->getSize()); |
| blk->setCoherenceBits(CacheBlk::DirtyBit); |
| |
| if (ppDataUpdate->hasListeners()) { |
| data_update.newData = std::vector<uint64_t>(blk->data, |
| blk->data + (blkSize / sizeof(uint64_t))); |
| ppDataUpdate->notify(data_update); |
| } |
| } |
| } |
| |
| QueueEntry* |
| BaseCache::getNextQueueEntry() |
| { |
| // Check both MSHR queue and write buffer for potential requests, |
| // note that null does not mean there is no request, it could |
| // simply be that it is not ready |
| MSHR *miss_mshr = mshrQueue.getNext(); |
| WriteQueueEntry *wq_entry = writeBuffer.getNext(); |
| |
| // If we got a write buffer request ready, first priority is a |
| // full write buffer, otherwise we favour the miss requests |
| if (wq_entry && (writeBuffer.isFull() || !miss_mshr)) { |
| // need to search MSHR queue for conflicting earlier miss. |
| MSHR *conflict_mshr = mshrQueue.findPending(wq_entry); |
| |
| if (conflict_mshr && conflict_mshr->order < wq_entry->order) { |
| // Service misses in order until conflict is cleared. |
| return conflict_mshr; |
| |
| // @todo Note that we ignore the ready time of the conflict here |
| } |
| |
| // No conflicts; issue write |
| return wq_entry; |
| } else if (miss_mshr) { |
| // need to check for conflicting earlier writeback |
| WriteQueueEntry *conflict_mshr = writeBuffer.findPending(miss_mshr); |
| if (conflict_mshr) { |
| // not sure why we don't check order here... it was in the |
| // original code but commented out. |
| |
| // The only way this happens is if we are |
| // doing a write and we didn't have permissions |
| // then subsequently saw a writeback (owned got evicted) |
| // We need to make sure to perform the writeback first |
| // To preserve the dirty data, then we can issue the write |
| |
| // should we return wq_entry here instead? I.e. do we |
| // have to flush writes in order? I don't think so... not |
| // for Alpha anyway. Maybe for x86? |
| return conflict_mshr; |
| |
| // @todo Note that we ignore the ready time of the conflict here |
| } |
| |
| // No conflicts; issue read |
| return miss_mshr; |
| } |
| |
| // fall through... no pending requests. Try a prefetch. |
| assert(!miss_mshr && !wq_entry); |
| if (prefetcher && mshrQueue.canPrefetch() && !isBlocked()) { |
| // If we have a miss queue slot, we can try a prefetch |
| PacketPtr pkt = prefetcher->getPacket(); |
| if (pkt) { |
| Addr pf_addr = pkt->getBlockAddr(blkSize); |
| if (tags->findBlock(pf_addr, pkt->isSecure())) { |
| DPRINTF(HWPrefetch, "Prefetch %#x has hit in cache, " |
| "dropped.\n", pf_addr); |
| prefetcher->pfHitInCache(); |
| // free the request and packet |
| delete pkt; |
| } else if (mshrQueue.findMatch(pf_addr, pkt->isSecure())) { |
| DPRINTF(HWPrefetch, "Prefetch %#x has hit in a MSHR, " |
| "dropped.\n", pf_addr); |
| prefetcher->pfHitInMSHR(); |
| // free the request and packet |
| delete pkt; |
| } else if (writeBuffer.findMatch(pf_addr, pkt->isSecure())) { |
| DPRINTF(HWPrefetch, "Prefetch %#x has hit in the " |
| "Write Buffer, dropped.\n", pf_addr); |
| prefetcher->pfHitInWB(); |
| // free the request and packet |
| delete pkt; |
| } else { |
| // Update statistic on number of prefetches issued |
| // (hwpf_mshr_misses) |
| assert(pkt->req->requestorId() < system->maxRequestors()); |
| stats.cmdStats(pkt).mshrMisses[pkt->req->requestorId()]++; |
| |
| // allocate an MSHR and return it, note |
| // that we send the packet straight away, so do not |
| // schedule the send |
| return allocateMissBuffer(pkt, curTick(), false); |
| } |
| } |
| } |
| |
| return nullptr; |
| } |
| |
| bool |
| BaseCache::handleEvictions(std::vector<CacheBlk*> &evict_blks, |
| PacketList &writebacks) |
| { |
| bool replacement = false; |
| for (const auto& blk : evict_blks) { |
| if (blk->isValid()) { |
| replacement = true; |
| |
| const MSHR* mshr = |
| mshrQueue.findMatch(regenerateBlkAddr(blk), blk->isSecure()); |
| if (mshr) { |
| // Must be an outstanding upgrade or clean request on a block |
| // we're about to replace |
| assert((!blk->isSet(CacheBlk::WritableBit) && |
| mshr->needsWritable()) || mshr->isCleaning()); |
| return false; |
| } |
| } |
| } |
| |
| // The victim will be replaced by a new entry, so increase the replacement |
| // counter if a valid block is being replaced |
| if (replacement) { |
| stats.replacements++; |
| |
| // Evict valid blocks associated to this victim block |
| for (auto& blk : evict_blks) { |
| if (blk->isValid()) { |
| evictBlock(blk, writebacks); |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| bool |
| BaseCache::updateCompressionData(CacheBlk *&blk, const uint64_t* data, |
| PacketList &writebacks) |
| { |
| // tempBlock does not exist in the tags, so don't do anything for it. |
| if (blk == tempBlock) { |
| return true; |
| } |
| |
| // The compressor is called to compress the updated data, so that its |
| // metadata can be updated. |
| Cycles compression_lat = Cycles(0); |
| Cycles decompression_lat = Cycles(0); |
| const auto comp_data = |
| compressor->compress(data, compression_lat, decompression_lat); |
| std::size_t compression_size = comp_data->getSizeBits(); |
| |
| // Get previous compressed size |
| CompressionBlk* compression_blk = static_cast<CompressionBlk*>(blk); |
| [[maybe_unused]] const std::size_t prev_size = |
| compression_blk->getSizeBits(); |
| |
| // If compressed size didn't change enough to modify its co-allocatability |
| // there is nothing to do. Otherwise we may be facing a data expansion |
| // (block passing from more compressed to less compressed state), or a |
| // data contraction (less to more). |
| bool is_data_expansion = false; |
| bool is_data_contraction = false; |
| const CompressionBlk::OverwriteType overwrite_type = |
| compression_blk->checkExpansionContraction(compression_size); |
| std::string op_name = ""; |
| if (overwrite_type == CompressionBlk::DATA_EXPANSION) { |
| op_name = "expansion"; |
| is_data_expansion = true; |
| } else if ((overwrite_type == CompressionBlk::DATA_CONTRACTION) && |
| moveContractions) { |
| op_name = "contraction"; |
| is_data_contraction = true; |
| } |
| |
| // If block changed compression state, it was possibly co-allocated with |
| // other blocks and cannot be co-allocated anymore, so one or more blocks |
| // must be evicted to make room for the expanded/contracted block |
| std::vector<CacheBlk*> evict_blks; |
| if (is_data_expansion || is_data_contraction) { |
| std::vector<CacheBlk*> evict_blks; |
| bool victim_itself = false; |
| CacheBlk *victim = nullptr; |
| if (replaceExpansions || is_data_contraction) { |
| victim = tags->findVictim(regenerateBlkAddr(blk), |
| blk->isSecure(), compression_size, evict_blks); |
| |
| // It is valid to return nullptr if there is no victim |
| if (!victim) { |
| return false; |
| } |
| |
| // If the victim block is itself the block won't need to be moved, |
| // and the victim should not be evicted |
| if (blk == victim) { |
| victim_itself = true; |
| auto it = std::find_if(evict_blks.begin(), evict_blks.end(), |
| [&blk](CacheBlk* evict_blk){ return evict_blk == blk; }); |
| evict_blks.erase(it); |
| } |
| |
| // Print victim block's information |
| DPRINTF(CacheRepl, "Data %s replacement victim: %s\n", |
| op_name, victim->print()); |
| } else { |
| // If we do not move the expanded block, we must make room for |
| // the expansion to happen, so evict every co-allocated block |
| const SuperBlk* superblock = static_cast<const SuperBlk*>( |
| compression_blk->getSectorBlock()); |
| for (auto& sub_blk : superblock->blks) { |
| if (sub_blk->isValid() && (blk != sub_blk)) { |
| evict_blks.push_back(sub_blk); |
| } |
| } |
| } |
| |
| // Try to evict blocks; if it fails, give up on update |
| if (!handleEvictions(evict_blks, writebacks)) { |
| return false; |
| } |
| |
| DPRINTF(CacheComp, "Data %s: [%s] from %d to %d bits\n", |
| op_name, blk->print(), prev_size, compression_size); |
| |
| if (!victim_itself && (replaceExpansions || is_data_contraction)) { |
| // Move the block's contents to the invalid block so that it now |
| // co-allocates with the other existing superblock entry |
| tags->moveBlock(blk, victim); |
| blk = victim; |
| compression_blk = static_cast<CompressionBlk*>(blk); |
| } |
| } |
| |
| // Update the number of data expansions/contractions |
| if (is_data_expansion) { |
| stats.dataExpansions++; |
| } else if (is_data_contraction) { |
| stats.dataContractions++; |
| } |
| |
| compression_blk->setSizeBits(compression_size); |
| compression_blk->setDecompressionLatency(decompression_lat); |
| |
| return true; |
| } |
| |
| void |
| BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool) |
| { |
| assert(pkt->isRequest()); |
| |
| assert(blk && blk->isValid()); |
| // Occasionally this is not true... if we are a lower-level cache |
| // satisfying a string of Read and ReadEx requests from |
| // upper-level caches, a Read will mark the block as shared but we |
| // can satisfy a following ReadEx anyway since we can rely on the |
| // Read requestor(s) to have buffered the ReadEx snoop and to |
| // invalidate their blocks after receiving them. |
| // assert(!pkt->needsWritable() || blk->isSet(CacheBlk::WritableBit)); |
| assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); |
| |
| // Check RMW operations first since both isRead() and |
| // isWrite() will be true for them |
| if (pkt->cmd == MemCmd::SwapReq) { |
| if (pkt->isAtomicOp()) { |
| // Get a copy of the old block's contents for the probe before |
| // the update |
| DataUpdate data_update(regenerateBlkAddr(blk), blk->isSecure()); |
| if (ppDataUpdate->hasListeners()) { |
| data_update.oldData = std::vector<uint64_t>(blk->data, |
| blk->data + (blkSize / sizeof(uint64_t))); |
| } |
| |
| // extract data from cache and save it into the data field in |
| // the packet as a return value from this atomic op |
| int offset = tags->extractBlkOffset(pkt->getAddr()); |
| uint8_t *blk_data = blk->data + offset; |
| pkt->setData(blk_data); |
| |
| // execute AMO operation |
| (*(pkt->getAtomicOp()))(blk_data); |
| |
| // Inform of this block's data contents update |
| if (ppDataUpdate->hasListeners()) { |
| data_update.newData = std::vector<uint64_t>(blk->data, |
| blk->data + (blkSize / sizeof(uint64_t))); |
| ppDataUpdate->notify(data_update); |
| } |
| |
| // set block status to dirty |
| blk->setCoherenceBits(CacheBlk::DirtyBit); |
| } else { |
| cmpAndSwap(blk, pkt); |
| } |
| } else if (pkt->isWrite()) { |
| // we have the block in a writable state and can go ahead, |
| // note that the line may be also be considered writable in |
| // downstream caches along the path to memory, but always |
| // Exclusive, and never Modified |
| assert(blk->isSet(CacheBlk::WritableBit)); |
| // Write or WriteLine at the first cache with block in writable state |
| if (blk->checkWrite(pkt)) { |
| updateBlockData(blk, pkt, true); |
| } |
| // Always mark the line as dirty (and thus transition to the |
| // Modified state) even if we are a failed StoreCond so we |
| // supply data to any snoops that have appended themselves to |
| // this cache before knowing the store will fail. |
| blk->setCoherenceBits(CacheBlk::DirtyBit); |
| DPRINTF(CacheVerbose, "%s for %s (write)\n", __func__, pkt->print()); |
| } else if (pkt->isRead()) { |
| if (pkt->isLLSC()) { |
| blk->trackLoadLocked(pkt); |
| } |
| |
| // all read responses have a data payload |
| assert(pkt->hasRespData()); |
| pkt->setDataFromBlock(blk->data, blkSize); |
| } else if (pkt->isUpgrade()) { |
| // sanity check |
| assert(!pkt->hasSharers()); |
| |
| if (blk->isSet(CacheBlk::DirtyBit)) { |
| // we were in the Owned state, and a cache above us that |
| // has the line in Shared state needs to be made aware |
| // that the data it already has is in fact dirty |
| pkt->setCacheResponding(); |
| blk->clearCoherenceBits(CacheBlk::DirtyBit); |
| } |
| } else if (pkt->isClean()) { |
| blk->clearCoherenceBits(CacheBlk::DirtyBit); |
| } else { |
| assert(pkt->isInvalidate()); |
| invalidateBlock(blk); |
| DPRINTF(CacheVerbose, "%s for %s (invalidation)\n", __func__, |
| pkt->print()); |
| } |
| } |
| |
| ///////////////////////////////////////////////////// |
| // |
| // Access path: requests coming in from the CPU side |
| // |
| ///////////////////////////////////////////////////// |
| Cycles |
| BaseCache::calculateTagOnlyLatency(const uint32_t delay, |
| const Cycles lookup_lat) const |
| { |
| // A tag-only access has to wait for the packet to arrive in order to |
| // perform the tag lookup. |
| return ticksToCycles(delay) + lookup_lat; |
| } |
| |
| Cycles |
| BaseCache::calculateAccessLatency(const CacheBlk* blk, const uint32_t delay, |
| const Cycles lookup_lat) const |
| { |
| Cycles lat(0); |
| |
| if (blk != nullptr) { |
| // As soon as the access arrives, for sequential accesses first access |
| // tags, then the data entry. In the case of parallel accesses the |
| // latency is dictated by the slowest of tag and data latencies. |
| if (sequentialAccess) { |
| lat = ticksToCycles(delay) + lookup_lat + dataLatency; |
| } else { |
| lat = ticksToCycles(delay) + std::max(lookup_lat, dataLatency); |
| } |
| |
| // Check if the block to be accessed is available. If not, apply the |
| // access latency on top of when the block is ready to be accessed. |
| const Tick tick = curTick() + delay; |
| const Tick when_ready = blk->getWhenReady(); |
| if (when_ready > tick && |
| ticksToCycles(when_ready - tick) > lat) { |
| lat += ticksToCycles(when_ready - tick); |
| } |
| } else { |
| // In case of a miss, we neglect the data access in a parallel |
| // configuration (i.e., the data access will be stopped as soon as |
| // we find out it is a miss), and use the tag-only latency. |
| lat = calculateTagOnlyLatency(delay, lookup_lat); |
| } |
| |
| return lat; |
| } |
| |
| bool |
| BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat, |
| PacketList &writebacks) |
| { |
| // sanity check |
| assert(pkt->isRequest()); |
| |
| gem5_assert(!(isReadOnly && pkt->isWrite()), |
| "Should never see a write in a read-only cache %s\n", |
| name()); |
| |
| // Access block in the tags |
| Cycles tag_latency(0); |
| blk = tags->accessBlock(pkt, tag_latency); |
| |
| DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(), |
| blk ? "hit " + blk->print() : "miss"); |
| |
| if (pkt->req->isCacheMaintenance()) { |
| // A cache maintenance operation is always forwarded to the |
| // memory below even if the block is found in dirty state. |
| |
| // We defer any changes to the state of the block until we |
| // create and mark as in service the mshr for the downstream |
| // packet. |
| |
| // Calculate access latency on top of when the packet arrives. This |
| // takes into account the bus delay. |
| lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); |
| |
| return false; |
| } |
| |
| if (pkt->isEviction()) { |
| // We check for presence of block in above caches before issuing |
| // Writeback or CleanEvict to write buffer. Therefore the only |
| // possible cases can be of a CleanEvict packet coming from above |
| // encountering a Writeback generated in this cache peer cache and |
| // waiting in the write buffer. Cases of upper level peer caches |
| // generating CleanEvict and Writeback or simply CleanEvict and |
| // CleanEvict almost simultaneously will be caught by snoops sent out |
| // by crossbar. |
| WriteQueueEntry *wb_entry = writeBuffer.findMatch(pkt->getAddr(), |
| pkt->isSecure()); |
| if (wb_entry) { |
| assert(wb_entry->getNumTargets() == 1); |
| PacketPtr wbPkt = wb_entry->getTarget()->pkt; |
| assert(wbPkt->isWriteback()); |
| |
| if (pkt->isCleanEviction()) { |
| // The CleanEvict and WritebackClean snoops into other |
| // peer caches of the same level while traversing the |
| // crossbar. If a copy of the block is found, the |
| // packet is deleted in the crossbar. Hence, none of |
| // the other upper level caches connected to this |
| // cache have the block, so we can clear the |
| // BLOCK_CACHED flag in the Writeback if set and |
| // discard the CleanEvict by returning true. |
| wbPkt->clearBlockCached(); |
| |
| // A clean evict does not need to access the data array |
| lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); |
| |
| return true; |
| } else { |
| assert(pkt->cmd == MemCmd::WritebackDirty); |
| // Dirty writeback from above trumps our clean |
| // writeback... discard here |
| // Note: markInService will remove entry from writeback buffer. |
| markInService(wb_entry); |
| delete wbPkt; |
| } |
| } |
| } |
| |
| // The critical latency part of a write depends only on the tag access |
| if (pkt->isWrite()) { |
| lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); |
| } |
| |
| // Writeback handling is special case. We can write the block into |
| // the cache without having a writeable copy (or any copy at all). |
| if (pkt->isWriteback()) { |
| assert(blkSize == pkt->getSize()); |
| |
| // we could get a clean writeback while we are having |
| // outstanding accesses to a block, do the simple thing for |
| // now and drop the clean writeback so that we do not upset |
| // any ordering/decisions about ownership already taken |
| if (pkt->cmd == MemCmd::WritebackClean && |
| mshrQueue.findMatch(pkt->getAddr(), pkt->isSecure())) { |
| DPRINTF(Cache, "Clean writeback %#llx to block with MSHR, " |
| "dropping\n", pkt->getAddr()); |
| |
| // A writeback searches for the block, then writes the data. |
| // As the writeback is being dropped, the data is not touched, |
| // and we just had to wait for the time to find a match in the |
| // MSHR. As of now assume a mshr queue search takes as long as |
| // a tag lookup for simplicity. |
| return true; |
| } |
| |
| const bool has_old_data = blk && blk->isValid(); |
| if (!blk) { |
| // need to do a replacement |
| blk = allocateBlock(pkt, writebacks); |
| if (!blk) { |
| // no replaceable block available: give up, fwd to next level. |
| incMissCount(pkt); |
| return false; |
| } |
| |
| blk->setCoherenceBits(CacheBlk::ReadableBit); |
| } else if (compressor) { |
| // This is an overwrite to an existing block, therefore we need |
| // to check for data expansion (i.e., block was compressed with |
| // a smaller size, and now it doesn't fit the entry anymore). |
| // If that is the case we might need to evict blocks. |
| if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(), |
| writebacks)) { |
| invalidateBlock(blk); |
| return false; |
| } |
| } |
| |
| // only mark the block dirty if we got a writeback command, |
| // and leave it as is for a clean writeback |
| if (pkt->cmd == MemCmd::WritebackDirty) { |
| // TODO: the coherent cache can assert that the dirty bit is set |
| blk->setCoherenceBits(CacheBlk::DirtyBit); |
| } |
| // if the packet does not have sharers, it is passing |
| // writable, and we got the writeback in Modified or Exclusive |
| // state, if not we are in the Owned or Shared state |
| if (!pkt->hasSharers()) { |
| blk->setCoherenceBits(CacheBlk::WritableBit); |
| } |
| // nothing else to do; writeback doesn't expect response |
| assert(!pkt->needsResponse()); |
| |
| updateBlockData(blk, pkt, has_old_data); |
| DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print()); |
| incHitCount(pkt); |
| |
| // When the packet metadata arrives, the tag lookup will be done while |
| // the payload is arriving. Then the block will be ready to access as |
| // soon as the fill is done |
| blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay + |
| std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay)); |
| |
| return true; |
| } else if (pkt->cmd == MemCmd::CleanEvict) { |
| // A CleanEvict does not need to access the data array |
| lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); |
| |
| if (blk) { |
| // Found the block in the tags, need to stop CleanEvict from |
| // propagating further down the hierarchy. Returning true will |
| // treat the CleanEvict like a satisfied write request and delete |
| // it. |
| return true; |
| } |
| // We didn't find the block here, propagate the CleanEvict further |
| // down the memory hierarchy. Returning false will treat the CleanEvict |
| // like a Writeback which could not find a replaceable block so has to |
| // go to next level. |
| return false; |
| } else if (pkt->cmd == MemCmd::WriteClean) { |
| // WriteClean handling is a special case. We can allocate a |
| // block directly if it doesn't exist and we can update the |
| // block immediately. The WriteClean transfers the ownership |
| // of the block as well. |
| assert(blkSize == pkt->getSize()); |
| |
| const bool has_old_data = blk && blk->isValid(); |
| if (!blk) { |
| if (pkt->writeThrough()) { |
| // if this is a write through packet, we don't try to |
| // allocate if the block is not present |
| return false; |
| } else { |
| // a writeback that misses needs to allocate a new block |
| blk = allocateBlock(pkt, writebacks); |
| if (!blk) { |
| // no replaceable block available: give up, fwd to |
| // next level. |
| incMissCount(pkt); |
| return false; |
| } |
| |
| blk->setCoherenceBits(CacheBlk::ReadableBit); |
| } |
| } else if (compressor) { |
| // This is an overwrite to an existing block, therefore we need |
| // to check for data expansion (i.e., block was compressed with |
| // a smaller size, and now it doesn't fit the entry anymore). |
| // If that is the case we might need to evict blocks. |
| if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(), |
| writebacks)) { |
| invalidateBlock(blk); |
| return false; |
| } |
| } |
| |
| // at this point either this is a writeback or a write-through |
| // write clean operation and the block is already in this |
| // cache, we need to update the data and the block flags |
| assert(blk); |
| // TODO: the coherent cache can assert that the dirty bit is set |
| if (!pkt->writeThrough()) { |
| blk->setCoherenceBits(CacheBlk::DirtyBit); |
| } |
| // nothing else to do; writeback doesn't expect response |
| assert(!pkt->needsResponse()); |
| |
| updateBlockData(blk, pkt, has_old_data); |
| DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print()); |
| |
| incHitCount(pkt); |
| |
| // When the packet metadata arrives, the tag lookup will be done while |
| // the payload is arriving. Then the block will be ready to access as |
| // soon as the fill is done |
| blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay + |
| std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay)); |
| |
| // If this a write-through packet it will be sent to cache below |
| return !pkt->writeThrough(); |
| } else if (blk && (pkt->needsWritable() ? |
| blk->isSet(CacheBlk::WritableBit) : |
| blk->isSet(CacheBlk::ReadableBit))) { |
| // OK to satisfy access |
| incHitCount(pkt); |
| |
| // Calculate access latency based on the need to access the data array |
| if (pkt->isRead()) { |
| lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency); |
| |
| // When a block is compressed, it must first be decompressed |
| // before being read. This adds to the access latency. |
| if (compressor) { |
| lat += compressor->getDecompressionLatency(blk); |
| } |
| } else { |
| lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); |
| } |
| |
| satisfyRequest(pkt, blk); |
| maintainClusivity(pkt->fromCache(), blk); |
| |
| return true; |
| } |
| |
| // Can't satisfy access normally... either no block (blk == nullptr) |
| // or have block but need writable |
| |
| incMissCount(pkt); |
| |
| lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency); |
| |
| if (!blk && pkt->isLLSC() && pkt->isWrite()) { |
| // complete miss on store conditional... just give up now |
| pkt->req->setExtraData(0); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| void |
| BaseCache::maintainClusivity(bool from_cache, CacheBlk *blk) |
| { |
| if (from_cache && blk && blk->isValid() && |
| !blk->isSet(CacheBlk::DirtyBit) && clusivity == enums::mostly_excl) { |
| // if we have responded to a cache, and our block is still |
| // valid, but not dirty, and this cache is mostly exclusive |
| // with respect to the cache above, drop the block |
| invalidateBlock(blk); |
| } |
| } |
| |
| CacheBlk* |
| BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks, |
| bool allocate) |
| { |
| assert(pkt->isResponse()); |
| Addr addr = pkt->getAddr(); |
| bool is_secure = pkt->isSecure(); |
| const bool has_old_data = blk && blk->isValid(); |
| const std::string old_state = (debug::Cache && blk) ? blk->print() : ""; |
| |
| // When handling a fill, we should have no writes to this line. |
| assert(addr == pkt->getBlockAddr(blkSize)); |
| assert(!writeBuffer.findMatch(addr, is_secure)); |
| |
| if (!blk) { |
| // better have read new data... |
| assert(pkt->hasData() || pkt->cmd == MemCmd::InvalidateResp); |
| |
| // need to do a replacement if allocating, otherwise we stick |
| // with the temporary storage |
| blk = allocate ? allocateBlock(pkt, writebacks) : nullptr; |
| |
| if (!blk) { |
| // No replaceable block or a mostly exclusive |
| // cache... just use temporary storage to complete the |
| // current request and then get rid of it |
| blk = tempBlock; |
| tempBlock->insert(addr, is_secure); |
| DPRINTF(Cache, "using temp block for %#llx (%s)\n", addr, |
| is_secure ? "s" : "ns"); |
| } |
| } else { |
| // existing block... probably an upgrade |
| // don't clear block status... if block is already dirty we |
| // don't want to lose that |
| } |
| |
| // Block is guaranteed to be valid at this point |
| assert(blk->isValid()); |
| assert(blk->isSecure() == is_secure); |
| assert(regenerateBlkAddr(blk) == addr); |
| |
| blk->setCoherenceBits(CacheBlk::ReadableBit); |
| |
| // sanity check for whole-line writes, which should always be |
| // marked as writable as part of the fill, and then later marked |
| // dirty as part of satisfyRequest |
| if (pkt->cmd == MemCmd::InvalidateResp) { |
| assert(!pkt->hasSharers()); |
| } |
| |
| // here we deal with setting the appropriate state of the line, |
| // and we start by looking at the hasSharers flag, and ignore the |
| // cacheResponding flag (normally signalling dirty data) if the |
| // packet has sharers, thus the line is never allocated as Owned |
| // (dirty but not writable), and always ends up being either |
| // Shared, Exclusive or Modified, see Packet::setCacheResponding |
| // for more details |
| if (!pkt->hasSharers()) { |
| // we could get a writable line from memory (rather than a |
| // cache) even in a read-only cache, note that we set this bit |
| // even for a read-only cache, possibly revisit this decision |
| blk->setCoherenceBits(CacheBlk::WritableBit); |
| |
| // check if we got this via cache-to-cache transfer (i.e., from a |
| // cache that had the block in Modified or Owned state) |
| if (pkt->cacheResponding()) { |
| // we got the block in Modified state, and invalidated the |
| // owners copy |
| blk->setCoherenceBits(CacheBlk::DirtyBit); |
| |
| gem5_assert(!isReadOnly, "Should never see dirty snoop response " |
| "in read-only cache %s\n", name()); |
| |
| } |
| } |
| |
| DPRINTF(Cache, "Block addr %#llx (%s) moving from %s to %s\n", |
| addr, is_secure ? "s" : "ns", old_state, blk->print()); |
| |
| // if we got new data, copy it in (checking for a read response |
| // and a response that has data is the same in the end) |
| if (pkt->isRead()) { |
| // sanity checks |
| assert(pkt->hasData()); |
| assert(pkt->getSize() == blkSize); |
| |
| updateBlockData(blk, pkt, has_old_data); |
| } |
| // The block will be ready when the payload arrives and the fill is done |
| blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay + |
| pkt->payloadDelay); |
| |
| return blk; |
| } |
| |
| CacheBlk* |
| BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks) |
| { |
| // Get address |
| const Addr addr = pkt->getAddr(); |
| |
| // Get secure bit |
| const bool is_secure = pkt->isSecure(); |
| |
| // Block size and compression related access latency. Only relevant if |
| // using a compressor, otherwise there is no extra delay, and the block |
| // is fully sized |
| std::size_t blk_size_bits = blkSize*8; |
| Cycles compression_lat = Cycles(0); |
| Cycles decompression_lat = Cycles(0); |
| |
| // If a compressor is being used, it is called to compress data before |
| // insertion. Although in Gem5 the data is stored uncompressed, even if a |
| // compressor is used, the compression/decompression methods are called to |
| // calculate the amount of extra cycles needed to read or write compressed |
| // blocks. |
| if (compressor && pkt->hasData()) { |
| const auto comp_data = compressor->compress( |
| pkt->getConstPtr<uint64_t>(), compression_lat, decompression_lat); |
| blk_size_bits = comp_data->getSizeBits(); |
| } |
| |
| // Find replacement victim |
| std::vector<CacheBlk*> evict_blks; |
| CacheBlk *victim = tags->findVictim(addr, is_secure, blk_size_bits, |
| evict_blks); |
| |
| // It is valid to return nullptr if there is no victim |
| if (!victim) |
| return nullptr; |
| |
| // Print victim block's information |
| DPRINTF(CacheRepl, "Replacement victim: %s\n", victim->print()); |
| |
| // Try to evict blocks; if it fails, give up on allocation |
| if (!handleEvictions(evict_blks, writebacks)) { |
| return nullptr; |
| } |
| |
| // Insert new block at victimized entry |
| tags->insertBlock(pkt, victim); |
| |
| // If using a compressor, set compression data. This must be done after |
| // insertion, as the compression bit may be set. |
| if (compressor) { |
| compressor->setSizeBits(victim, blk_size_bits); |
| compressor->setDecompressionLatency(victim, decompression_lat); |
| } |
| |
| return victim; |
| } |
| |
| void |
| BaseCache::invalidateBlock(CacheBlk *blk) |
| { |
| // If block is still marked as prefetched, then it hasn't been used |
| if (blk->wasPrefetched()) { |
| prefetcher->prefetchUnused(); |
| } |
| |
| // Notify that the data contents for this address are no longer present |
| updateBlockData(blk, nullptr, blk->isValid()); |
| |
| // If handling a block present in the Tags, let it do its invalidation |
| // process, which will update stats and invalidate the block itself |
| if (blk != tempBlock) { |
| tags->invalidate(blk); |
| } else { |
| tempBlock->invalidate(); |
| } |
| } |
| |
| void |
| BaseCache::evictBlock(CacheBlk *blk, PacketList &writebacks) |
| { |
| PacketPtr pkt = evictBlock(blk); |
| if (pkt) { |
| writebacks.push_back(pkt); |
| } |
| } |
| |
| PacketPtr |
| BaseCache::writebackBlk(CacheBlk *blk) |
| { |
| gem5_assert(!isReadOnly || writebackClean, |
| "Writeback from read-only cache"); |
| assert(blk && blk->isValid() && |
| (blk->isSet(CacheBlk::DirtyBit) || writebackClean)); |
| |
| stats.writebacks[Request::wbRequestorId]++; |
| |
| RequestPtr req = std::make_shared<Request>( |
| regenerateBlkAddr(blk), blkSize, 0, Request::wbRequestorId); |
| |
| if (blk->isSecure()) |
| req->setFlags(Request::SECURE); |
| |
| req->taskId(blk->getTaskId()); |
| |
| PacketPtr pkt = |
| new Packet(req, blk->isSet(CacheBlk::DirtyBit) ? |
| MemCmd::WritebackDirty : MemCmd::WritebackClean); |
| |
| DPRINTF(Cache, "Create Writeback %s writable: %d, dirty: %d\n", |
| pkt->print(), blk->isSet(CacheBlk::WritableBit), |
| blk->isSet(CacheBlk::DirtyBit)); |
| |
| if (blk->isSet(CacheBlk::WritableBit)) { |
| // not asserting shared means we pass the block in modified |
| // state, mark our own block non-writeable |
| blk->clearCoherenceBits(CacheBlk::WritableBit); |
| } else { |
| // we are in the Owned state, tell the receiver |
| pkt->setHasSharers(); |
| } |
| |
| // make sure the block is not marked dirty |
| blk->clearCoherenceBits(CacheBlk::DirtyBit); |
| |
| pkt->allocate(); |
| pkt->setDataFromBlock(blk->data, blkSize); |
| |
| // When a block is compressed, it must first be decompressed before being |
| // sent for writeback. |
| if (compressor) { |
| pkt->payloadDelay = compressor->getDecompressionLatency(blk); |
| } |
| |
| return pkt; |
| } |
| |
| PacketPtr |
| BaseCache::writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id) |
| { |
| RequestPtr req = std::make_shared<Request>( |
| regenerateBlkAddr(blk), blkSize, 0, Request::wbRequestorId); |
| |
| if (blk->isSecure()) { |
| req->setFlags(Request::SECURE); |
| } |
| req->taskId(blk->getTaskId()); |
| |
| PacketPtr pkt = new Packet(req, MemCmd::WriteClean, blkSize, id); |
| |
| if (dest) { |
| req->setFlags(dest); |
| pkt->setWriteThrough(); |
| } |
| |
| DPRINTF(Cache, "Create %s writable: %d, dirty: %d\n", pkt->print(), |
| blk->isSet(CacheBlk::WritableBit), blk->isSet(CacheBlk::DirtyBit)); |
| |
| if (blk->isSet(CacheBlk::WritableBit)) { |
| // not asserting shared means we pass the block in modified |
| // state, mark our own block non-writeable |
| blk->clearCoherenceBits(CacheBlk::WritableBit); |
| } else { |
| // we are in the Owned state, tell the receiver |
| pkt->setHasSharers(); |
| } |
| |
| // make sure the block is not marked dirty |
| blk->clearCoherenceBits(CacheBlk::DirtyBit); |
| |
| pkt->allocate(); |
| pkt->setDataFromBlock(blk->data, blkSize); |
| |
| // When a block is compressed, it must first be decompressed before being |
| // sent for writeback. |
| if (compressor) { |
| pkt->payloadDelay = compressor->getDecompressionLatency(blk); |
| } |
| |
| return pkt; |
| } |
| |
| |
| void |
| BaseCache::memWriteback() |
| { |
| tags->forEachBlk([this](CacheBlk &blk) { writebackVisitor(blk); }); |
| } |
| |
| void |
| BaseCache::memInvalidate() |
| { |
| tags->forEachBlk([this](CacheBlk &blk) { invalidateVisitor(blk); }); |
| } |
| |
| bool |
| BaseCache::isDirty() const |
| { |
| return tags->anyBlk([](CacheBlk &blk) { |
| return blk.isSet(CacheBlk::DirtyBit); }); |
| } |
| |
| bool |
| BaseCache::coalesce() const |
| { |
| return writeAllocator && writeAllocator->coalesce(); |
| } |
| |
| void |
| BaseCache::writebackVisitor(CacheBlk &blk) |
| { |
| if (blk.isSet(CacheBlk::DirtyBit)) { |
| assert(blk.isValid()); |
| |
| RequestPtr request = std::make_shared<Request>( |
| regenerateBlkAddr(&blk), blkSize, 0, Request::funcRequestorId); |
| |
| request->taskId(blk.getTaskId()); |
| if (blk.isSecure()) { |
| request->setFlags(Request::SECURE); |
| } |
| |
| Packet packet(request, MemCmd::WriteReq); |
| packet.dataStatic(blk.data); |
| |
| memSidePort.sendFunctional(&packet); |
| |
| blk.clearCoherenceBits(CacheBlk::DirtyBit); |
| } |
| } |
| |
| void |
| BaseCache::invalidateVisitor(CacheBlk &blk) |
| { |
| if (blk.isSet(CacheBlk::DirtyBit)) |
| warn_once("Invalidating dirty cache lines. " \ |
| "Expect things to break.\n"); |
| |
| if (blk.isValid()) { |
| assert(!blk.isSet(CacheBlk::DirtyBit)); |
| invalidateBlock(&blk); |
| } |
| } |
| |
| Tick |
| BaseCache::nextQueueReadyTime() const |
| { |
| Tick nextReady = std::min(mshrQueue.nextReadyTime(), |
| writeBuffer.nextReadyTime()); |
| |
| // Don't signal prefetch ready time if no MSHRs available |
| // Will signal once enoguh MSHRs are deallocated |
| if (prefetcher && mshrQueue.canPrefetch() && !isBlocked()) { |
| nextReady = std::min(nextReady, |
| prefetcher->nextPrefetchReadyTime()); |
| } |
| |
| return nextReady; |
| } |
| |
| |
| bool |
| BaseCache::sendMSHRQueuePacket(MSHR* mshr) |
| { |
| assert(mshr); |
| |
| // use request from 1st target |
| PacketPtr tgt_pkt = mshr->getTarget()->pkt; |
| |
| DPRINTF(Cache, "%s: MSHR %s\n", __func__, tgt_pkt->print()); |
| |
| // if the cache is in write coalescing mode or (additionally) in |
| // no allocation mode, and we have a write packet with an MSHR |
| // that is not a whole-line write (due to incompatible flags etc), |
| // then reset the write mode |
| if (writeAllocator && writeAllocator->coalesce() && tgt_pkt->isWrite()) { |
| if (!mshr->isWholeLineWrite()) { |
| // if we are currently write coalescing, hold on the |
| // MSHR as many cycles extra as we need to completely |
| // write a cache line |
| if (writeAllocator->delay(mshr->blkAddr)) { |
| Tick delay = blkSize / tgt_pkt->getSize() * clockPeriod(); |
| DPRINTF(CacheVerbose, "Delaying pkt %s %llu ticks to allow " |
| "for write coalescing\n", tgt_pkt->print(), delay); |
| mshrQueue.delay(mshr, delay); |
| return false; |
| } else { |
| writeAllocator->reset(); |
| } |
| } else { |
| writeAllocator->resetDelay(mshr->blkAddr); |
| } |
| } |
| |
| CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure); |
| |
| // either a prefetch that is not present upstream, or a normal |
| // MSHR request, proceed to get the packet to send downstream |
| PacketPtr pkt = createMissPacket(tgt_pkt, blk, mshr->needsWritable(), |
| mshr->isWholeLineWrite()); |
| |
| mshr->isForward = (pkt == nullptr); |
| |
| if (mshr->isForward) { |
| // not a cache block request, but a response is expected |
| // make copy of current packet to forward, keep current |
| // copy for response handling |
| pkt = new Packet(tgt_pkt, false, true); |
| assert(!pkt->isWrite()); |
| } |
| |
| // play it safe and append (rather than set) the sender state, |
| // as forwarded packets may already have existing state |
| pkt->pushSenderState(mshr); |
| |
| if (pkt->isClean() && blk && blk->isSet(CacheBlk::DirtyBit)) { |
| // A cache clean opearation is looking for a dirty block. Mark |
| // the packet so that the destination xbar can determine that |
| // there will be a follow-up write packet as well. |
| pkt->setSatisfied(); |
| } |
| |
| if (!memSidePort.sendTimingReq(pkt)) { |
| // we are awaiting a retry, but we |
| // delete the packet and will be creating a new packet |
| // when we get the opportunity |
| delete pkt; |
| |
| // note that we have now masked any requestBus and |
| // schedSendEvent (we will wait for a retry before |
| // doing anything), and this is so even if we do not |
| // care about this packet and might override it before |
| // it gets retried |
| return true; |
| } else { |
| // As part of the call to sendTimingReq the packet is |
| // forwarded to all neighbouring caches (and any caches |
| // above them) as a snoop. Thus at this point we know if |
| // any of the neighbouring caches are responding, and if |
| // so, we know it is dirty, and we can determine if it is |
| // being passed as Modified, making our MSHR the ordering |
| // point |
| bool pending_modified_resp = !pkt->hasSharers() && |
| pkt->cacheResponding(); |
| markInService(mshr, pending_modified_resp); |
| |
| if (pkt->isClean() && blk && blk->isSet(CacheBlk::DirtyBit)) { |
| // A cache clean opearation is looking for a dirty |
| // block. If a dirty block is encountered a WriteClean |
| // will update any copies to the path to the memory |
| // until the point of reference. |
| DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n", |
| __func__, pkt->print(), blk->print()); |
| PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), |
| pkt->id); |
| PacketList writebacks; |
| writebacks.push_back(wb_pkt); |
| doWritebacks(writebacks, 0); |
| } |
| |
| return false; |
| } |
| } |
| |
| bool |
| BaseCache::sendWriteQueuePacket(WriteQueueEntry* wq_entry) |
| { |
| assert(wq_entry); |
| |
| // always a single target for write queue entries |
| PacketPtr tgt_pkt = wq_entry->getTarget()->pkt; |
| |
| DPRINTF(Cache, "%s: write %s\n", __func__, tgt_pkt->print()); |
| |
| // forward as is, both for evictions and uncacheable writes |
| if (!memSidePort.sendTimingReq(tgt_pkt)) { |
| // note that we have now masked any requestBus and |
| // schedSendEvent (we will wait for a retry before |
| // doing anything), and this is so even if we do not |
| // care about this packet and might override it before |
| // it gets retried |
| return true; |
| } else { |
| markInService(wq_entry); |
| return false; |
| } |
| } |
| |
| void |
| BaseCache::serialize(CheckpointOut &cp) const |
| { |
| bool dirty(isDirty()); |
| |
| if (dirty) { |
| warn("*** The cache still contains dirty data. ***\n"); |
| warn(" Make sure to drain the system using the correct flags.\n"); |
| warn(" This checkpoint will not restore correctly " \ |
| "and dirty data in the cache will be lost!\n"); |
| } |
| |
| // Since we don't checkpoint the data in the cache, any dirty data |
| // will be lost when restoring from a checkpoint of a system that |
| // wasn't drained properly. Flag the checkpoint as invalid if the |
| // cache contains dirty data. |
| bool bad_checkpoint(dirty); |
| SERIALIZE_SCALAR(bad_checkpoint); |
| } |
| |
| void |
| BaseCache::unserialize(CheckpointIn &cp) |
| { |
| bool bad_checkpoint; |
| UNSERIALIZE_SCALAR(bad_checkpoint); |
| if (bad_checkpoint) { |
| fatal("Restoring from checkpoints with dirty caches is not " |
| "supported in the classic memory system. Please remove any " |
| "caches or drain them properly before taking checkpoints.\n"); |
| } |
| } |
| |
| |
| BaseCache::CacheCmdStats::CacheCmdStats(BaseCache &c, |
| const std::string &name) |
| : statistics::Group(&c, name.c_str()), cache(c), |
| ADD_STAT(hits, statistics::units::Count::get(), |
| ("number of " + name + " hits").c_str()), |
| ADD_STAT(misses, statistics::units::Count::get(), |
| ("number of " + name + " misses").c_str()), |
| ADD_STAT(hitLatency, statistics::units::Tick::get(), |
| ("number of " + name + " hit ticks").c_str()), |
| ADD_STAT(missLatency, statistics::units::Tick::get(), |
| ("number of " + name + " miss ticks").c_str()), |
| ADD_STAT(accesses, statistics::units::Count::get(), |
| ("number of " + name + " accesses(hits+misses)").c_str()), |
| ADD_STAT(missRate, statistics::units::Ratio::get(), |
| ("miss rate for " + name + " accesses").c_str()), |
| ADD_STAT(avgMissLatency, statistics::units::Rate< |
| statistics::units::Tick, statistics::units::Count>::get(), |
| ("average " + name + " miss latency").c_str()), |
| ADD_STAT(mshrHits, statistics::units::Count::get(), |
| ("number of " + name + " MSHR hits").c_str()), |
| ADD_STAT(mshrMisses, statistics::units::Count::get(), |
| ("number of " + name + " MSHR misses").c_str()), |
| ADD_STAT(mshrUncacheable, statistics::units::Count::get(), |
| ("number of " + name + " MSHR uncacheable").c_str()), |
| ADD_STAT(mshrMissLatency, statistics::units::Tick::get(), |
| ("number of " + name + " MSHR miss ticks").c_str()), |
| ADD_STAT(mshrUncacheableLatency, statistics::units::Tick::get(), |
| ("number of " + name + " MSHR uncacheable ticks").c_str()), |
| ADD_STAT(mshrMissRate, statistics::units::Ratio::get(), |
| ("mshr miss rate for " + name + " accesses").c_str()), |
| ADD_STAT(avgMshrMissLatency, statistics::units::Rate< |
| statistics::units::Tick, statistics::units::Count>::get(), |
| ("average " + name + " mshr miss latency").c_str()), |
| ADD_STAT(avgMshrUncacheableLatency, statistics::units::Rate< |
| statistics::units::Tick, statistics::units::Count>::get(), |
| ("average " + name + " mshr uncacheable latency").c_str()) |
| { |
| } |
| |
| void |
| BaseCache::CacheCmdStats::regStatsFromParent() |
| { |
| using namespace statistics; |
| |
| statistics::Group::regStats(); |
| System *system = cache.system; |
| const auto max_requestors = system->maxRequestors(); |
| |
| hits |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| hits.subname(i, system->getRequestorName(i)); |
| } |
| |
| // Miss statistics |
| misses |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| misses.subname(i, system->getRequestorName(i)); |
| } |
| |
| // Hit latency statistics |
| hitLatency |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| hitLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| // Miss latency statistics |
| missLatency |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| missLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| // access formulas |
| accesses.flags(total | nozero | nonan); |
| accesses = hits + misses; |
| for (int i = 0; i < max_requestors; i++) { |
| accesses.subname(i, system->getRequestorName(i)); |
| } |
| |
| // miss rate formulas |
| missRate.flags(total | nozero | nonan); |
| missRate = misses / accesses; |
| for (int i = 0; i < max_requestors; i++) { |
| missRate.subname(i, system->getRequestorName(i)); |
| } |
| |
| // miss latency formulas |
| avgMissLatency.flags(total | nozero | nonan); |
| avgMissLatency = missLatency / misses; |
| for (int i = 0; i < max_requestors; i++) { |
| avgMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| // MSHR statistics |
| // MSHR hit statistics |
| mshrHits |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| mshrHits.subname(i, system->getRequestorName(i)); |
| } |
| |
| // MSHR miss statistics |
| mshrMisses |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| mshrMisses.subname(i, system->getRequestorName(i)); |
| } |
| |
| // MSHR miss latency statistics |
| mshrMissLatency |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| mshrMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| // MSHR uncacheable statistics |
| mshrUncacheable |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| mshrUncacheable.subname(i, system->getRequestorName(i)); |
| } |
| |
| // MSHR miss latency statistics |
| mshrUncacheableLatency |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| mshrUncacheableLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| // MSHR miss rate formulas |
| mshrMissRate.flags(total | nozero | nonan); |
| mshrMissRate = mshrMisses / accesses; |
| |
| for (int i = 0; i < max_requestors; i++) { |
| mshrMissRate.subname(i, system->getRequestorName(i)); |
| } |
| |
| // mshrMiss latency formulas |
| avgMshrMissLatency.flags(total | nozero | nonan); |
| avgMshrMissLatency = mshrMissLatency / mshrMisses; |
| for (int i = 0; i < max_requestors; i++) { |
| avgMshrMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| // mshrUncacheable latency formulas |
| avgMshrUncacheableLatency.flags(total | nozero | nonan); |
| avgMshrUncacheableLatency = mshrUncacheableLatency / mshrUncacheable; |
| for (int i = 0; i < max_requestors; i++) { |
| avgMshrUncacheableLatency.subname(i, system->getRequestorName(i)); |
| } |
| } |
| |
| BaseCache::CacheStats::CacheStats(BaseCache &c) |
| : statistics::Group(&c), cache(c), |
| |
| ADD_STAT(demandHits, statistics::units::Count::get(), |
| "number of demand (read+write) hits"), |
| ADD_STAT(overallHits, statistics::units::Count::get(), |
| "number of overall hits"), |
| ADD_STAT(demandHitLatency, statistics::units::Tick::get(), |
| "number of demand (read+write) hit ticks"), |
| ADD_STAT(overallHitLatency, statistics::units::Tick::get(), |
| "number of overall hit ticks"), |
| ADD_STAT(demandMisses, statistics::units::Count::get(), |
| "number of demand (read+write) misses"), |
| ADD_STAT(overallMisses, statistics::units::Count::get(), |
| "number of overall misses"), |
| ADD_STAT(demandMissLatency, statistics::units::Tick::get(), |
| "number of demand (read+write) miss ticks"), |
| ADD_STAT(overallMissLatency, statistics::units::Tick::get(), |
| "number of overall miss ticks"), |
| ADD_STAT(demandAccesses, statistics::units::Count::get(), |
| "number of demand (read+write) accesses"), |
| ADD_STAT(overallAccesses, statistics::units::Count::get(), |
| "number of overall (read+write) accesses"), |
| ADD_STAT(demandMissRate, statistics::units::Ratio::get(), |
| "miss rate for demand accesses"), |
| ADD_STAT(overallMissRate, statistics::units::Ratio::get(), |
| "miss rate for overall accesses"), |
| ADD_STAT(demandAvgMissLatency, statistics::units::Rate< |
| statistics::units::Cycle, statistics::units::Count>::get(), |
| "average overall miss latency"), |
| ADD_STAT(overallAvgMissLatency, statistics::units::Rate< |
| statistics::units::Cycle, statistics::units::Count>::get(), |
| "average overall miss latency"), |
| ADD_STAT(blockedCycles, statistics::units::Cycle::get(), |
| "number of cycles access was blocked"), |
| ADD_STAT(blockedCauses, statistics::units::Count::get(), |
| "number of times access was blocked"), |
| ADD_STAT(avgBlocked, statistics::units::Rate< |
| statistics::units::Cycle, statistics::units::Count>::get(), |
| "average number of cycles each access was blocked"), |
| ADD_STAT(writebacks, statistics::units::Count::get(), |
| "number of writebacks"), |
| ADD_STAT(demandMshrHits, statistics::units::Count::get(), |
| "number of demand (read+write) MSHR hits"), |
| ADD_STAT(overallMshrHits, statistics::units::Count::get(), |
| "number of overall MSHR hits"), |
| ADD_STAT(demandMshrMisses, statistics::units::Count::get(), |
| "number of demand (read+write) MSHR misses"), |
| ADD_STAT(overallMshrMisses, statistics::units::Count::get(), |
| "number of overall MSHR misses"), |
| ADD_STAT(overallMshrUncacheable, statistics::units::Count::get(), |
| "number of overall MSHR uncacheable misses"), |
| ADD_STAT(demandMshrMissLatency, statistics::units::Tick::get(), |
| "number of demand (read+write) MSHR miss ticks"), |
| ADD_STAT(overallMshrMissLatency, statistics::units::Tick::get(), |
| "number of overall MSHR miss ticks"), |
| ADD_STAT(overallMshrUncacheableLatency, statistics::units::Tick::get(), |
| "number of overall MSHR uncacheable ticks"), |
| ADD_STAT(demandMshrMissRate, statistics::units::Ratio::get(), |
| "mshr miss ratio for demand accesses"), |
| ADD_STAT(overallMshrMissRate, statistics::units::Ratio::get(), |
| "mshr miss ratio for overall accesses"), |
| ADD_STAT(demandAvgMshrMissLatency, statistics::units::Rate< |
| statistics::units::Cycle, statistics::units::Count>::get(), |
| "average overall mshr miss latency"), |
| ADD_STAT(overallAvgMshrMissLatency, statistics::units::Rate< |
| statistics::units::Cycle, statistics::units::Count>::get(), |
| "average overall mshr miss latency"), |
| ADD_STAT(overallAvgMshrUncacheableLatency, statistics::units::Rate< |
| statistics::units::Cycle, statistics::units::Count>::get(), |
| "average overall mshr uncacheable latency"), |
| ADD_STAT(replacements, statistics::units::Count::get(), |
| "number of replacements"), |
| ADD_STAT(dataExpansions, statistics::units::Count::get(), |
| "number of data expansions"), |
| ADD_STAT(dataContractions, statistics::units::Count::get(), |
| "number of data contractions"), |
| cmd(MemCmd::NUM_MEM_CMDS) |
| { |
| for (int idx = 0; idx < MemCmd::NUM_MEM_CMDS; ++idx) |
| cmd[idx].reset(new CacheCmdStats(c, MemCmd(idx).toString())); |
| } |
| |
| void |
| BaseCache::CacheStats::regStats() |
| { |
| using namespace statistics; |
| |
| statistics::Group::regStats(); |
| |
| System *system = cache.system; |
| const auto max_requestors = system->maxRequestors(); |
| |
| for (auto &cs : cmd) |
| cs->regStatsFromParent(); |
| |
| // These macros make it easier to sum the right subset of commands and |
| // to change the subset of commands that are considered "demand" vs |
| // "non-demand" |
| #define SUM_DEMAND(s) \ |
| (cmd[MemCmd::ReadReq]->s + cmd[MemCmd::WriteReq]->s + \ |
| cmd[MemCmd::WriteLineReq]->s + cmd[MemCmd::ReadExReq]->s + \ |
| cmd[MemCmd::ReadCleanReq]->s + cmd[MemCmd::ReadSharedReq]->s) |
| |
| // should writebacks be included here? prior code was inconsistent... |
| #define SUM_NON_DEMAND(s) \ |
| (cmd[MemCmd::SoftPFReq]->s + cmd[MemCmd::HardPFReq]->s + \ |
| cmd[MemCmd::SoftPFExReq]->s) |
| |
| demandHits.flags(total | nozero | nonan); |
| demandHits = SUM_DEMAND(hits); |
| for (int i = 0; i < max_requestors; i++) { |
| demandHits.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallHits.flags(total | nozero | nonan); |
| overallHits = demandHits + SUM_NON_DEMAND(hits); |
| for (int i = 0; i < max_requestors; i++) { |
| overallHits.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandMisses.flags(total | nozero | nonan); |
| demandMisses = SUM_DEMAND(misses); |
| for (int i = 0; i < max_requestors; i++) { |
| demandMisses.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallMisses.flags(total | nozero | nonan); |
| overallMisses = demandMisses + SUM_NON_DEMAND(misses); |
| for (int i = 0; i < max_requestors; i++) { |
| overallMisses.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandMissLatency.flags(total | nozero | nonan); |
| demandMissLatency = SUM_DEMAND(missLatency); |
| for (int i = 0; i < max_requestors; i++) { |
| demandMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallMissLatency.flags(total | nozero | nonan); |
| overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency); |
| for (int i = 0; i < max_requestors; i++) { |
| overallMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandHitLatency.flags(total | nozero | nonan); |
| demandHitLatency = SUM_DEMAND(hitLatency); |
| for (int i = 0; i < max_requestors; i++) { |
| demandHitLatency.subname(i, system->getRequestorName(i)); |
| } |
| overallHitLatency.flags(total | nozero | nonan); |
| overallHitLatency = demandHitLatency + SUM_NON_DEMAND(hitLatency); |
| for (int i = 0; i < max_requestors; i++) { |
| overallHitLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandAccesses.flags(total | nozero | nonan); |
| demandAccesses = demandHits + demandMisses; |
| for (int i = 0; i < max_requestors; i++) { |
| demandAccesses.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallAccesses.flags(total | nozero | nonan); |
| overallAccesses = overallHits + overallMisses; |
| for (int i = 0; i < max_requestors; i++) { |
| overallAccesses.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandMissRate.flags(total | nozero | nonan); |
| demandMissRate = demandMisses / demandAccesses; |
| for (int i = 0; i < max_requestors; i++) { |
| demandMissRate.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallMissRate.flags(total | nozero | nonan); |
| overallMissRate = overallMisses / overallAccesses; |
| for (int i = 0; i < max_requestors; i++) { |
| overallMissRate.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandAvgMissLatency.flags(total | nozero | nonan); |
| demandAvgMissLatency = demandMissLatency / demandMisses; |
| for (int i = 0; i < max_requestors; i++) { |
| demandAvgMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallAvgMissLatency.flags(total | nozero | nonan); |
| overallAvgMissLatency = overallMissLatency / overallMisses; |
| for (int i = 0; i < max_requestors; i++) { |
| overallAvgMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| blockedCycles.init(NUM_BLOCKED_CAUSES); |
| blockedCycles |
| .subname(Blocked_NoMSHRs, "no_mshrs") |
| .subname(Blocked_NoTargets, "no_targets") |
| ; |
| |
| |
| blockedCauses.init(NUM_BLOCKED_CAUSES); |
| blockedCauses |
| .subname(Blocked_NoMSHRs, "no_mshrs") |
| .subname(Blocked_NoTargets, "no_targets") |
| ; |
| |
| avgBlocked |
| .subname(Blocked_NoMSHRs, "no_mshrs") |
| .subname(Blocked_NoTargets, "no_targets") |
| ; |
| avgBlocked = blockedCycles / blockedCauses; |
| |
| writebacks |
| .init(max_requestors) |
| .flags(total | nozero | nonan) |
| ; |
| for (int i = 0; i < max_requestors; i++) { |
| writebacks.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandMshrHits.flags(total | nozero | nonan); |
| demandMshrHits = SUM_DEMAND(mshrHits); |
| for (int i = 0; i < max_requestors; i++) { |
| demandMshrHits.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallMshrHits.flags(total | nozero | nonan); |
| overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshrHits); |
| for (int i = 0; i < max_requestors; i++) { |
| overallMshrHits.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandMshrMisses.flags(total | nozero | nonan); |
| demandMshrMisses = SUM_DEMAND(mshrMisses); |
| for (int i = 0; i < max_requestors; i++) { |
| demandMshrMisses.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallMshrMisses.flags(total | nozero | nonan); |
| overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshrMisses); |
| for (int i = 0; i < max_requestors; i++) { |
| overallMshrMisses.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandMshrMissLatency.flags(total | nozero | nonan); |
| demandMshrMissLatency = SUM_DEMAND(mshrMissLatency); |
| for (int i = 0; i < max_requestors; i++) { |
| demandMshrMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallMshrMissLatency.flags(total | nozero | nonan); |
| overallMshrMissLatency = |
| demandMshrMissLatency + SUM_NON_DEMAND(mshrMissLatency); |
| for (int i = 0; i < max_requestors; i++) { |
| overallMshrMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallMshrUncacheable.flags(total | nozero | nonan); |
| overallMshrUncacheable = |
| SUM_DEMAND(mshrUncacheable) + SUM_NON_DEMAND(mshrUncacheable); |
| for (int i = 0; i < max_requestors; i++) { |
| overallMshrUncacheable.subname(i, system->getRequestorName(i)); |
| } |
| |
| |
| overallMshrUncacheableLatency.flags(total | nozero | nonan); |
| overallMshrUncacheableLatency = |
| SUM_DEMAND(mshrUncacheableLatency) + |
| SUM_NON_DEMAND(mshrUncacheableLatency); |
| for (int i = 0; i < max_requestors; i++) { |
| overallMshrUncacheableLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandMshrMissRate.flags(total | nozero | nonan); |
| demandMshrMissRate = demandMshrMisses / demandAccesses; |
| for (int i = 0; i < max_requestors; i++) { |
| demandMshrMissRate.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallMshrMissRate.flags(total | nozero | nonan); |
| overallMshrMissRate = overallMshrMisses / overallAccesses; |
| for (int i = 0; i < max_requestors; i++) { |
| overallMshrMissRate.subname(i, system->getRequestorName(i)); |
| } |
| |
| demandAvgMshrMissLatency.flags(total | nozero | nonan); |
| demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses; |
| for (int i = 0; i < max_requestors; i++) { |
| demandAvgMshrMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallAvgMshrMissLatency.flags(total | nozero | nonan); |
| overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses; |
| for (int i = 0; i < max_requestors; i++) { |
| overallAvgMshrMissLatency.subname(i, system->getRequestorName(i)); |
| } |
| |
| overallAvgMshrUncacheableLatency.flags(total | nozero | nonan); |
| overallAvgMshrUncacheableLatency = |
| overallMshrUncacheableLatency / overallMshrUncacheable; |
| for (int i = 0; i < max_requestors; i++) { |
| overallAvgMshrUncacheableLatency.subname(i, |
| system->getRequestorName(i)); |
| } |
| |
| dataExpansions.flags(nozero | nonan); |
| dataContractions.flags(nozero | nonan); |
| } |
| |
| void |
| BaseCache::regProbePoints() |
| { |
| ppHit = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Hit"); |
| ppMiss = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Miss"); |
| ppFill = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Fill"); |
| ppDataUpdate = |
| new ProbePointArg<DataUpdate>(this->getProbeManager(), "Data Update"); |
| } |
| |
| /////////////// |
| // |
| // CpuSidePort |
| // |
| /////////////// |
| bool |
| BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt) |
| { |
| // Snoops shouldn't happen when bypassing caches |
| assert(!cache->system->bypassCaches()); |
| |
| assert(pkt->isResponse()); |
| |
| // Express snoop responses from requestor to responder, e.g., from L1 to L2 |
| cache->recvTimingSnoopResp(pkt); |
| return true; |
| } |
| |
| |
| bool |
| BaseCache::CpuSidePort::tryTiming(PacketPtr pkt) |
| { |
| if (cache->system->bypassCaches() || pkt->isExpressSnoop()) { |
| // always let express snoop packets through even if blocked |
| return true; |
| } else if (blocked || mustSendRetry) { |
| // either already committed to send a retry, or blocked |
| mustSendRetry = true; |
| return false; |
| } |
| mustSendRetry = false; |
| return true; |
| } |
| |
| bool |
| BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt) |
| { |
| assert(pkt->isRequest()); |
| |
| if (cache->system->bypassCaches()) { |
| // Just forward the packet if caches are disabled. |
| // @todo This should really enqueue the packet rather |
| [[maybe_unused]] bool success = cache->memSidePort.sendTimingReq(pkt); |
| assert(success); |
| return true; |
| } else if (tryTiming(pkt)) { |
| cache->recvTimingReq(pkt); |
| return true; |
| } |
| return false; |
| } |
| |
| Tick |
| BaseCache::CpuSidePort::recvAtomic(PacketPtr pkt) |
| { |
| if (cache->system->bypassCaches()) { |
| // Forward the request if the system is in cache bypass mode. |
| return cache->memSidePort.sendAtomic(pkt); |
| } else { |
| return cache->recvAtomic(pkt); |
| } |
| } |
| |
| void |
| BaseCache::CpuSidePort::recvFunctional(PacketPtr pkt) |
| { |
| if (cache->system->bypassCaches()) { |
| // The cache should be flushed if we are in cache bypass mode, |
| // so we don't need to check if we need to update anything. |
| cache->memSidePort.sendFunctional(pkt); |
| return; |
| } |
| |
| // functional request |
| cache->functionalAccess(pkt, true); |
| } |
| |
| AddrRangeList |
| BaseCache::CpuSidePort::getAddrRanges() const |
| { |
| return cache->getAddrRanges(); |
| } |
| |
| |
| BaseCache:: |
| CpuSidePort::CpuSidePort(const std::string &_name, BaseCache *_cache, |
| const std::string &_label) |
| : CacheResponsePort(_name, _cache, _label), cache(_cache) |
| { |
| } |
| |
| /////////////// |
| // |
| // MemSidePort |
| // |
| /////////////// |
| bool |
| BaseCache::MemSidePort::recvTimingResp(PacketPtr pkt) |
| { |
| cache->recvTimingResp(pkt); |
| return true; |
| } |
| |
| // Express snooping requests to memside port |
| void |
| BaseCache::MemSidePort::recvTimingSnoopReq(PacketPtr pkt) |
| { |
| // Snoops shouldn't happen when bypassing caches |
| assert(!cache->system->bypassCaches()); |
| |
| // handle snooping requests |
| cache->recvTimingSnoopReq(pkt); |
| } |
| |
| Tick |
| BaseCache::MemSidePort::recvAtomicSnoop(PacketPtr pkt) |
| { |
| // Snoops shouldn't happen when bypassing caches |
| assert(!cache->system->bypassCaches()); |
| |
| return cache->recvAtomicSnoop(pkt); |
| } |
| |
| void |
| BaseCache::MemSidePort::recvFunctionalSnoop(PacketPtr pkt) |
| { |
| // Snoops shouldn't happen when bypassing caches |
| assert(!cache->system->bypassCaches()); |
| |
| // functional snoop (note that in contrast to atomic we don't have |
| // a specific functionalSnoop method, as they have the same |
| // behaviour regardless) |
| cache->functionalAccess(pkt, false); |
| } |
| |
| void |
| BaseCache::CacheReqPacketQueue::sendDeferredPacket() |
| { |
| // sanity check |
| assert(!waitingOnRetry); |
| |
| // there should never be any deferred request packets in the |
| // queue, instead we resly on the cache to provide the packets |
| // from the MSHR queue or write queue |
| assert(deferredPacketReadyTime() == MaxTick); |
| |
| // check for request packets (requests & writebacks) |
| QueueEntry* entry = cache.getNextQueueEntry(); |
| |
| if (!entry) { |
| // can happen if e.g. we attempt a writeback and fail, but |
| // before the retry, the writeback is eliminated because |
| // we snoop another cache's ReadEx. |
| } else { |
| // let our snoop responses go first if there are responses to |
| // the same addresses |
| if (checkConflictingSnoop(entry->getTarget()->pkt)) { |
| return; |
| } |
| waitingOnRetry = entry->sendPacket(cache); |
| } |
| |
| // if we succeeded and are not waiting for a retry, schedule the |
| // next send considering when the next queue is ready, note that |
| // snoop responses have their own packet queue and thus schedule |
| // their own events |
| if (!waitingOnRetry) { |
| schedSendEvent(cache.nextQueueReadyTime()); |
| } |
| } |
| |
| BaseCache::MemSidePort::MemSidePort(const std::string &_name, |
| BaseCache *_cache, |
| const std::string &_label) |
| : CacheRequestPort(_name, _cache, _reqQueue, _snoopRespQueue), |
| _reqQueue(*_cache, *this, _snoopRespQueue, _label), |
| _snoopRespQueue(*_cache, *this, true, _label), cache(_cache) |
| { |
| } |
| |
| void |
| WriteAllocator::updateMode(Addr write_addr, unsigned write_size, |
| Addr blk_addr) |
| { |
| // check if we are continuing where the last write ended |
| if (nextAddr == write_addr) { |
| delayCtr[blk_addr] = delayThreshold; |
| // stop if we have already saturated |
| if (mode != WriteMode::NO_ALLOCATE) { |
| byteCount += write_size; |
| // switch to streaming mode if we have passed the lower |
| // threshold |
| if (mode == WriteMode::ALLOCATE && |
| byteCount > coalesceLimit) { |
| mode = WriteMode::COALESCE; |
| DPRINTF(Cache, "Switched to write coalescing\n"); |
| } else if (mode == WriteMode::COALESCE && |
| byteCount > noAllocateLimit) { |
| // and continue and switch to non-allocating mode if we |
| // pass the upper threshold |
| mode = WriteMode::NO_ALLOCATE; |
| DPRINTF(Cache, "Switched to write-no-allocate\n"); |
| } |
| } |
| } else { |
| // we did not see a write matching the previous one, start |
| // over again |
| byteCount = write_size; |
| mode = WriteMode::ALLOCATE; |
| resetDelay(blk_addr); |
| } |
| nextAddr = write_addr + write_size; |
| } |
| |
| } // namespace gem5 |