| /* |
| * Copyright (c) 2013-2014,2017-2018 ARM Limited |
| * All rights reserved |
| * |
| * The license below extends only to copyright in the software and shall |
| * not be construed as granting a license to any other intellectual |
| * property including but not limited to intellectual property relating |
| * to a hardware implementation of the functionality of the software |
| * licensed hereunder. You may use the software subject to the license |
| * terms below provided that you ensure that this notice is replicated |
| * unmodified and in its entirety in all distributions of the software, |
| * modified or unmodified, in source code or in binary form. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Authors: Andrew Bardsley |
| */ |
| |
| #include "cpu/minor/lsq.hh" |
| |
| #include <iomanip> |
| #include <sstream> |
| |
| #include "arch/locked_mem.hh" |
| #include "arch/mmapped_ipr.hh" |
| #include "base/logging.hh" |
| #include "cpu/minor/cpu.hh" |
| #include "cpu/minor/exec_context.hh" |
| #include "cpu/minor/execute.hh" |
| #include "cpu/minor/pipeline.hh" |
| #include "cpu/utils.hh" |
| #include "debug/Activity.hh" |
| #include "debug/MinorMem.hh" |
| |
| namespace Minor |
| { |
| |
| LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, |
| PacketDataPtr data_, uint64_t *res_) : |
| SenderState(), |
| port(port_), |
| inst(inst_), |
| isLoad(isLoad_), |
| data(data_), |
| packet(NULL), |
| request(), |
| res(res_), |
| skipped(false), |
| issuedToMemory(false), |
| isTranslationDelayed(false), |
| state(NotIssued) |
| { |
| request = std::make_shared<Request>(); |
| } |
| |
| void |
| LSQ::LSQRequest::tryToSuppressFault() |
| { |
| SimpleThread &thread = *port.cpu.threads[inst->id.threadId]; |
| TheISA::PCState old_pc = thread.pcState(); |
| ExecContext context(port.cpu, thread, port.execute, inst); |
| Fault M5_VAR_USED fault = inst->translationFault; |
| |
| // Give the instruction a chance to suppress a translation fault |
| inst->translationFault = inst->staticInst->initiateAcc(&context, nullptr); |
| if (inst->translationFault == NoFault) { |
| DPRINTFS(MinorMem, (&port), |
| "Translation fault suppressed for inst:%s\n", *inst); |
| } else { |
| assert(inst->translationFault == fault); |
| } |
| thread.pcState(old_pc); |
| } |
| |
| void |
| LSQ::LSQRequest::completeDisabledMemAccess() |
| { |
| DPRINTFS(MinorMem, (&port), "Complete disabled mem access for inst:%s\n", |
| *inst); |
| |
| SimpleThread &thread = *port.cpu.threads[inst->id.threadId]; |
| TheISA::PCState old_pc = thread.pcState(); |
| |
| ExecContext context(port.cpu, thread, port.execute, inst); |
| |
| context.setMemAccPredicate(false); |
| inst->staticInst->completeAcc(nullptr, &context, inst->traceData); |
| |
| thread.pcState(old_pc); |
| } |
| |
| void |
| LSQ::LSQRequest::disableMemAccess() |
| { |
| port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false); |
| DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst); |
| } |
| |
| LSQ::AddrRangeCoverage |
| LSQ::LSQRequest::containsAddrRangeOf( |
| Addr req1_addr, unsigned int req1_size, |
| Addr req2_addr, unsigned int req2_size) |
| { |
| /* 'end' here means the address of the byte just past the request |
| * blocks */ |
| Addr req2_end_addr = req2_addr + req2_size; |
| Addr req1_end_addr = req1_addr + req1_size; |
| |
| AddrRangeCoverage ret; |
| |
| if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr) |
| ret = NoAddrRangeCoverage; |
| else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr) |
| ret = FullAddrRangeCoverage; |
| else |
| ret = PartialAddrRangeCoverage; |
| |
| return ret; |
| } |
| |
| LSQ::AddrRangeCoverage |
| LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request) |
| { |
| return containsAddrRangeOf(request->getPaddr(), request->getSize(), |
| other_request->request->getPaddr(), other_request->request->getSize()); |
| } |
| |
| bool |
| LSQ::LSQRequest::isBarrier() |
| { |
| return inst->isInst() && inst->staticInst->isMemBarrier(); |
| } |
| |
| bool |
| LSQ::LSQRequest::needsToBeSentToStoreBuffer() |
| { |
| return state == StoreToStoreBuffer; |
| } |
| |
| void |
| LSQ::LSQRequest::setState(LSQRequestState new_state) |
| { |
| DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:" |
| " %s\n", state, new_state, *inst); |
| state = new_state; |
| } |
| |
| bool |
| LSQ::LSQRequest::isComplete() const |
| { |
| /* @todo, There is currently only one 'completed' state. This |
| * may not be a good choice */ |
| return state == Complete; |
| } |
| |
| void |
| LSQ::LSQRequest::reportData(std::ostream &os) const |
| { |
| os << (isLoad ? 'R' : 'W') << ';'; |
| inst->reportData(os); |
| os << ';' << state; |
| } |
| |
| std::ostream & |
| operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage) |
| { |
| switch (coverage) { |
| case LSQ::PartialAddrRangeCoverage: |
| os << "PartialAddrRangeCoverage"; |
| break; |
| case LSQ::FullAddrRangeCoverage: |
| os << "FullAddrRangeCoverage"; |
| break; |
| case LSQ::NoAddrRangeCoverage: |
| os << "NoAddrRangeCoverage"; |
| break; |
| default: |
| os << "AddrRangeCoverage-" << static_cast<int>(coverage); |
| break; |
| } |
| return os; |
| } |
| |
| std::ostream & |
| operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) |
| { |
| switch (state) { |
| case LSQ::LSQRequest::NotIssued: |
| os << "NotIssued"; |
| break; |
| case LSQ::LSQRequest::InTranslation: |
| os << "InTranslation"; |
| break; |
| case LSQ::LSQRequest::Translated: |
| os << "Translated"; |
| break; |
| case LSQ::LSQRequest::Failed: |
| os << "Failed"; |
| break; |
| case LSQ::LSQRequest::RequestIssuing: |
| os << "RequestIssuing"; |
| break; |
| case LSQ::LSQRequest::StoreToStoreBuffer: |
| os << "StoreToStoreBuffer"; |
| break; |
| case LSQ::LSQRequest::StoreInStoreBuffer: |
| os << "StoreInStoreBuffer"; |
| break; |
| case LSQ::LSQRequest::StoreBufferIssuing: |
| os << "StoreBufferIssuing"; |
| break; |
| case LSQ::LSQRequest::RequestNeedsRetry: |
| os << "RequestNeedsRetry"; |
| break; |
| case LSQ::LSQRequest::StoreBufferNeedsRetry: |
| os << "StoreBufferNeedsRetry"; |
| break; |
| case LSQ::LSQRequest::Complete: |
| os << "Complete"; |
| break; |
| default: |
| os << "LSQRequestState-" << static_cast<int>(state); |
| break; |
| } |
| return os; |
| } |
| |
| void |
| LSQ::clearMemBarrier(MinorDynInstPtr inst) |
| { |
| bool is_last_barrier = |
| inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId]; |
| |
| DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", |
| (is_last_barrier ? "last" : "a"), *inst); |
| |
| if (is_last_barrier) |
| lastMemBarrier[inst->id.threadId] = 0; |
| } |
| |
| void |
| LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_, |
| ThreadContext *tc, BaseTLB::Mode mode) |
| { |
| port.numAccessesInDTLB--; |
| |
| DPRINTFS(MinorMem, (&port), "Received translation response for" |
| " request: %s delayed:%d %s\n", *inst, isTranslationDelayed, |
| fault_ != NoFault ? fault_->name() : ""); |
| |
| if (fault_ != NoFault) { |
| inst->translationFault = fault_; |
| if (isTranslationDelayed) { |
| tryToSuppressFault(); |
| if (inst->translationFault == NoFault) { |
| completeDisabledMemAccess(); |
| setState(Complete); |
| } |
| } |
| setState(Translated); |
| } else { |
| setState(Translated); |
| makePacket(); |
| } |
| port.tryToSendToTransfers(this); |
| |
| /* Let's try and wake up the processor for the next cycle */ |
| port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); |
| } |
| |
| void |
| LSQ::SingleDataRequest::startAddrTranslation() |
| { |
| ThreadContext *thread = port.cpu.getContext( |
| inst->id.threadId); |
| |
| const auto &byteEnable = request->getByteEnable(); |
| if (byteEnable.size() == 0 || |
| isAnyActiveElement(byteEnable.cbegin(), byteEnable.cend())) { |
| port.numAccessesInDTLB++; |
| |
| setState(LSQ::LSQRequest::InTranslation); |
| |
| DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n"); |
| /* Submit the translation request. The response will come through |
| * finish/markDelayed on the LSQRequest as it bears the Translation |
| * interface */ |
| thread->getDTBPtr()->translateTiming( |
| request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write)); |
| } else { |
| disableMemAccess(); |
| setState(LSQ::LSQRequest::Complete); |
| } |
| } |
| |
| void |
| LSQ::SingleDataRequest::retireResponse(PacketPtr packet_) |
| { |
| DPRINTFS(MinorMem, (&port), "Retiring packet\n"); |
| packet = packet_; |
| packetInFlight = false; |
| setState(Complete); |
| } |
| |
| void |
| LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_, |
| ThreadContext *tc, BaseTLB::Mode mode) |
| { |
| port.numAccessesInDTLB--; |
| |
| unsigned int M5_VAR_USED expected_fragment_index = |
| numTranslatedFragments; |
| |
| numInTranslationFragments--; |
| numTranslatedFragments++; |
| |
| DPRINTFS(MinorMem, (&port), "Received translation response for fragment" |
| " %d of request: %s delayed:%d %s\n", expected_fragment_index, |
| *inst, isTranslationDelayed, |
| fault_ != NoFault ? fault_->name() : ""); |
| |
| assert(request_ == fragmentRequests[expected_fragment_index]); |
| |
| /* Wake up next cycle to get things going again in case the |
| * tryToSendToTransfers does take */ |
| port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); |
| |
| if (fault_ != NoFault) { |
| /* tryToSendToTransfers will handle the fault */ |
| inst->translationFault = fault_; |
| |
| DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:" |
| " %d of request: %s\n", |
| expected_fragment_index, *inst); |
| |
| if (expected_fragment_index > 0 || isTranslationDelayed) |
| tryToSuppressFault(); |
| if (expected_fragment_index == 0) { |
| if (isTranslationDelayed && inst->translationFault == NoFault) { |
| completeDisabledMemAccess(); |
| setState(Complete); |
| } else { |
| setState(Translated); |
| } |
| } else if (inst->translationFault == NoFault) { |
| setState(Translated); |
| numTranslatedFragments--; |
| makeFragmentPackets(); |
| } else { |
| setState(Translated); |
| } |
| port.tryToSendToTransfers(this); |
| } else if (numTranslatedFragments == numFragments) { |
| makeFragmentPackets(); |
| setState(Translated); |
| port.tryToSendToTransfers(this); |
| } else { |
| /* Avoid calling translateTiming from within ::finish */ |
| assert(!translationEvent.scheduled()); |
| port.cpu.schedule(translationEvent, curTick()); |
| } |
| } |
| |
| LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, |
| bool isLoad_, PacketDataPtr data_, uint64_t *res_) : |
| LSQRequest(port_, inst_, isLoad_, data_, res_), |
| translationEvent([this]{ sendNextFragmentToTranslation(); }, |
| "translationEvent"), |
| numFragments(0), |
| numInTranslationFragments(0), |
| numTranslatedFragments(0), |
| numIssuedFragments(0), |
| numRetiredFragments(0), |
| fragmentRequests(), |
| fragmentPackets() |
| { |
| /* Don't know how many elements are needed until the request is |
| * populated by the caller. */ |
| } |
| |
| LSQ::SplitDataRequest::~SplitDataRequest() |
| { |
| for (auto i = fragmentPackets.begin(); |
| i != fragmentPackets.end(); i++) |
| { |
| delete *i; |
| } |
| } |
| |
| void |
| LSQ::SplitDataRequest::makeFragmentRequests() |
| { |
| Addr base_addr = request->getVaddr(); |
| unsigned int whole_size = request->getSize(); |
| unsigned int line_width = port.lineWidth; |
| |
| unsigned int fragment_size; |
| Addr fragment_addr; |
| |
| std::vector<bool> fragment_write_byte_en; |
| |
| /* Assume that this transfer is across potentially many block snap |
| * boundaries: |
| * |
| * | _|________|________|________|___ | |
| * | |0| 1 | 2 | 3 | 4 | | |
| * | |_|________|________|________|___| | |
| * | | | | | | |
| * |
| * The first transfer (0) can be up to lineWidth in size. |
| * All the middle transfers (1-3) are lineWidth in size |
| * The last transfer (4) can be from zero to lineWidth - 1 in size |
| */ |
| unsigned int first_fragment_offset = |
| addrBlockOffset(base_addr, line_width); |
| unsigned int last_fragment_size = |
| addrBlockOffset(base_addr + whole_size, line_width); |
| unsigned int first_fragment_size = |
| line_width - first_fragment_offset; |
| |
| unsigned int middle_fragments_total_size = |
| whole_size - (first_fragment_size + last_fragment_size); |
| |
| assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0); |
| |
| unsigned int middle_fragment_count = |
| middle_fragments_total_size / line_width; |
| |
| numFragments = 1 /* first */ + middle_fragment_count + |
| (last_fragment_size == 0 ? 0 : 1); |
| |
| DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests." |
| " First fragment size: %d Last fragment size: %d\n", |
| numFragments, first_fragment_size, |
| (last_fragment_size == 0 ? line_width : last_fragment_size)); |
| |
| assert(((middle_fragment_count * line_width) + |
| first_fragment_size + last_fragment_size) == whole_size); |
| |
| fragment_addr = base_addr; |
| fragment_size = first_fragment_size; |
| |
| /* Just past the last address in the request */ |
| Addr end_addr = base_addr + whole_size; |
| |
| auto& byte_enable = request->getByteEnable(); |
| unsigned int num_disabled_fragments = 0; |
| |
| for (unsigned int fragment_index = 0; fragment_index < numFragments; |
| fragment_index++) |
| { |
| bool M5_VAR_USED is_last_fragment = false; |
| |
| if (fragment_addr == base_addr) { |
| /* First fragment */ |
| fragment_size = first_fragment_size; |
| } else { |
| if ((fragment_addr + line_width) > end_addr) { |
| /* Adjust size of last fragment */ |
| fragment_size = end_addr - fragment_addr; |
| is_last_fragment = true; |
| } else { |
| /* Middle fragments */ |
| fragment_size = line_width; |
| } |
| } |
| |
| RequestPtr fragment = std::make_shared<Request>(); |
| bool disabled_fragment = false; |
| |
| fragment->setContext(request->contextId()); |
| if (byte_enable.empty()) { |
| fragment->setVirt(0 /* asid */, |
| fragment_addr, fragment_size, request->getFlags(), |
| request->masterId(), |
| request->getPC()); |
| } else { |
| // Set up byte-enable mask for the current fragment |
| auto it_start = byte_enable.begin() + |
| (fragment_addr - base_addr); |
| auto it_end = byte_enable.begin() + |
| (fragment_addr - base_addr) + fragment_size; |
| if (isAnyActiveElement(it_start, it_end)) { |
| fragment->setVirt(0 /* asid */, |
| fragment_addr, fragment_size, request->getFlags(), |
| request->masterId(), |
| request->getPC()); |
| fragment->setByteEnable(std::vector<bool>(it_start, it_end)); |
| } else { |
| disabled_fragment = true; |
| } |
| } |
| |
| if (!disabled_fragment) { |
| DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x" |
| " size: %d (whole request addr: 0x%x size: %d) %s\n", |
| fragment_addr, fragment_size, base_addr, whole_size, |
| (is_last_fragment ? "last fragment" : "")); |
| |
| fragmentRequests.push_back(fragment); |
| } else { |
| num_disabled_fragments++; |
| } |
| |
| fragment_addr += fragment_size; |
| } |
| assert(numFragments >= num_disabled_fragments); |
| numFragments -= num_disabled_fragments; |
| } |
| |
| void |
| LSQ::SplitDataRequest::makeFragmentPackets() |
| { |
| assert(numTranslatedFragments > 0); |
| Addr base_addr = request->getVaddr(); |
| |
| DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst); |
| |
| for (unsigned int fragment_index = 0; |
| fragment_index < numTranslatedFragments; |
| fragment_index++) |
| { |
| RequestPtr fragment = fragmentRequests[fragment_index]; |
| |
| DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s" |
| " (%d, 0x%x)\n", |
| fragment_index, *inst, |
| (fragment->hasPaddr() ? "has paddr" : "no paddr"), |
| (fragment->hasPaddr() ? fragment->getPaddr() : 0)); |
| |
| Addr fragment_addr = fragment->getVaddr(); |
| unsigned int fragment_size = fragment->getSize(); |
| |
| uint8_t *request_data = NULL; |
| |
| if (!isLoad) { |
| /* Split data for Packets. Will become the property of the |
| * outgoing Packets */ |
| request_data = new uint8_t[fragment_size]; |
| std::memcpy(request_data, data + (fragment_addr - base_addr), |
| fragment_size); |
| } |
| |
| assert(fragment->hasPaddr()); |
| |
| PacketPtr fragment_packet = |
| makePacketForRequest(fragment, isLoad, this, request_data); |
| |
| fragmentPackets.push_back(fragment_packet); |
| /* Accumulate flags in parent request */ |
| request->setFlags(fragment->getFlags()); |
| } |
| |
| /* Might as well make the overall/response packet here */ |
| /* Get the physical address for the whole request/packet from the first |
| * fragment */ |
| request->setPaddr(fragmentRequests[0]->getPaddr()); |
| makePacket(); |
| } |
| |
| void |
| LSQ::SplitDataRequest::startAddrTranslation() |
| { |
| makeFragmentRequests(); |
| |
| if (numFragments > 0) { |
| setState(LSQ::LSQRequest::InTranslation); |
| numInTranslationFragments = 0; |
| numTranslatedFragments = 0; |
| |
| /* @todo, just do these in sequence for now with |
| * a loop of: |
| * do { |
| * sendNextFragmentToTranslation ; translateTiming ; finish |
| * } while (numTranslatedFragments != numFragments); |
| */ |
| |
| /* Do first translation */ |
| sendNextFragmentToTranslation(); |
| } else { |
| disableMemAccess(); |
| setState(LSQ::LSQRequest::Complete); |
| } |
| } |
| |
| PacketPtr |
| LSQ::SplitDataRequest::getHeadPacket() |
| { |
| assert(numIssuedFragments < numTranslatedFragments); |
| |
| return fragmentPackets[numIssuedFragments]; |
| } |
| |
| void |
| LSQ::SplitDataRequest::stepToNextPacket() |
| { |
| assert(numIssuedFragments < numTranslatedFragments); |
| |
| numIssuedFragments++; |
| } |
| |
| void |
| LSQ::SplitDataRequest::retireResponse(PacketPtr response) |
| { |
| assert(inst->translationFault == NoFault); |
| assert(numRetiredFragments < numTranslatedFragments); |
| |
| DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d" |
| " offset: 0x%x (retired fragment num: %d)\n", |
| response->req->getVaddr(), response->req->getSize(), |
| request->getVaddr() - response->req->getVaddr(), |
| numRetiredFragments); |
| |
| numRetiredFragments++; |
| |
| if (skipped) { |
| /* Skip because we already knew the request had faulted or been |
| * skipped */ |
| DPRINTFS(MinorMem, (&port), "Skipping this fragment\n"); |
| } else if (response->isError()) { |
| /* Mark up the error and leave to execute to handle it */ |
| DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n"); |
| setSkipped(); |
| packet->copyError(response); |
| } else { |
| if (isLoad) { |
| if (!data) { |
| /* For a split transfer, a Packet must be constructed |
| * to contain all returning data. This is that packet's |
| * data */ |
| data = new uint8_t[request->getSize()]; |
| } |
| |
| /* Populate the portion of the overall response data represented |
| * by the response fragment */ |
| std::memcpy( |
| data + (response->req->getVaddr() - request->getVaddr()), |
| response->getConstPtr<uint8_t>(), |
| response->req->getSize()); |
| } |
| } |
| |
| /* Complete early if we're skipping are no more in-flight accesses */ |
| if (skipped && !hasPacketsInMemSystem()) { |
| DPRINTFS(MinorMem, (&port), "Completed skipped burst\n"); |
| setState(Complete); |
| if (packet->needsResponse()) |
| packet->makeResponse(); |
| } |
| |
| if (numRetiredFragments == numTranslatedFragments) |
| setState(Complete); |
| |
| if (!skipped && isComplete()) { |
| DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL); |
| |
| DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d" |
| " needsResponse: %d packetSize: %s requestSize: %s responseSize:" |
| " %s\n", packet->isRead(), packet->isWrite(), |
| packet->needsResponse(), packet->getSize(), request->getSize(), |
| response->getSize()); |
| |
| /* A request can become complete by several paths, this is a sanity |
| * check to make sure the packet's data is created */ |
| if (!data) { |
| data = new uint8_t[request->getSize()]; |
| } |
| |
| if (isLoad) { |
| DPRINTFS(MinorMem, (&port), "Copying read data\n"); |
| std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize()); |
| } |
| packet->makeResponse(); |
| } |
| |
| /* Packets are all deallocated together in ~SplitLSQRequest */ |
| } |
| |
| void |
| LSQ::SplitDataRequest::sendNextFragmentToTranslation() |
| { |
| unsigned int fragment_index = numTranslatedFragments; |
| |
| ThreadContext *thread = port.cpu.getContext( |
| inst->id.threadId); |
| |
| DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n", |
| fragment_index); |
| |
| port.numAccessesInDTLB++; |
| numInTranslationFragments++; |
| |
| thread->getDTBPtr()->translateTiming( |
| fragmentRequests[fragment_index], thread, this, (isLoad ? |
| BaseTLB::Read : BaseTLB::Write)); |
| } |
| |
| bool |
| LSQ::StoreBuffer::canInsert() const |
| { |
| /* @todo, support store amalgamation */ |
| return slots.size() < numSlots; |
| } |
| |
| void |
| LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request) |
| { |
| auto found = std::find(slots.begin(), slots.end(), request); |
| |
| if (found != slots.end()) { |
| DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n", |
| request, *found, *(request->inst)); |
| slots.erase(found); |
| |
| delete request; |
| } |
| } |
| |
| void |
| LSQ::StoreBuffer::insert(LSQRequestPtr request) |
| { |
| if (!canInsert()) { |
| warn("%s: store buffer insertion without space to insert from" |
| " inst: %s\n", name(), *(request->inst)); |
| } |
| |
| DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request); |
| |
| numUnissuedAccesses++; |
| |
| if (request->state != LSQRequest::Complete) |
| request->setState(LSQRequest::StoreInStoreBuffer); |
| |
| slots.push_back(request); |
| |
| /* Let's try and wake up the processor for the next cycle to step |
| * the store buffer */ |
| lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); |
| } |
| |
| LSQ::AddrRangeCoverage |
| LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, |
| unsigned int &found_slot) |
| { |
| unsigned int slot_index = slots.size() - 1; |
| auto i = slots.rbegin(); |
| AddrRangeCoverage ret = NoAddrRangeCoverage; |
| |
| /* Traverse the store buffer in reverse order (most to least recent) |
| * and try to find a slot whose address range overlaps this request */ |
| while (ret == NoAddrRangeCoverage && i != slots.rend()) { |
| LSQRequestPtr slot = *i; |
| |
| /* Cache maintenance instructions go down via the store path but |
| * they carry no data and they shouldn't be considered |
| * for forwarding */ |
| if (slot->packet && |
| slot->inst->id.threadId == request->inst->id.threadId && |
| !slot->packet->req->isCacheMaintenance()) { |
| AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); |
| |
| if (coverage != NoAddrRangeCoverage) { |
| DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:" |
| " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n", |
| slot_index, coverage, |
| request->request->getPaddr(), request->request->getSize(), |
| slot->request->getPaddr(), slot->request->getSize()); |
| |
| found_slot = slot_index; |
| ret = coverage; |
| } |
| } |
| |
| i++; |
| slot_index--; |
| } |
| |
| return ret; |
| } |
| |
| /** Fill the given packet with appropriate date from slot slot_number */ |
| void |
| LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load, |
| unsigned int slot_number) |
| { |
| assert(slot_number < slots.size()); |
| assert(load->packet); |
| assert(load->isLoad); |
| |
| LSQRequestPtr store = slots[slot_number]; |
| |
| assert(store->packet); |
| assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage); |
| |
| Addr load_addr = load->request->getPaddr(); |
| Addr store_addr = store->request->getPaddr(); |
| Addr addr_offset = load_addr - store_addr; |
| |
| unsigned int load_size = load->request->getSize(); |
| |
| DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer" |
| " slot: %d addr: 0x%x addressOffset: 0x%x\n", |
| load_size, load_addr, slot_number, |
| store_addr, addr_offset); |
| |
| void *load_packet_data = load->packet->getPtr<void>(); |
| void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset; |
| |
| std::memcpy(load_packet_data, store_packet_data, load_size); |
| } |
| |
| void |
| LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request) |
| { |
| /* Barriers are accounted for as they are cleared from |
| * the queue, not after their transfers are complete */ |
| if (!request->isBarrier()) |
| numUnissuedAccesses--; |
| } |
| |
| void |
| LSQ::StoreBuffer::step() |
| { |
| DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n", |
| numUnissuedAccesses); |
| |
| if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) { |
| /* Clear all the leading barriers */ |
| while (!slots.empty() && |
| slots.front()->isComplete() && slots.front()->isBarrier()) |
| { |
| LSQRequestPtr barrier = slots.front(); |
| |
| DPRINTF(MinorMem, "Clearing barrier for inst: %s\n", |
| *(barrier->inst)); |
| |
| numUnissuedAccesses--; |
| lsq.clearMemBarrier(barrier->inst); |
| slots.pop_front(); |
| |
| delete barrier; |
| } |
| |
| auto i = slots.begin(); |
| bool issued = true; |
| unsigned int issue_count = 0; |
| |
| /* Skip trying if the memory system is busy */ |
| if (lsq.state == LSQ::MemoryNeedsRetry) |
| issued = false; |
| |
| /* Try to issue all stores in order starting from the head |
| * of the queue. Responses are allowed to be retired |
| * out of order */ |
| while (issued && |
| issue_count < storeLimitPerCycle && |
| lsq.canSendToMemorySystem() && |
| i != slots.end()) |
| { |
| LSQRequestPtr request = *i; |
| |
| DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d" |
| " state: %s\n", |
| *(request->inst), request->sentAllPackets(), |
| request->state); |
| |
| if (request->isBarrier() && request->isComplete()) { |
| /* Give up at barriers */ |
| issued = false; |
| } else if (!(request->state == LSQRequest::StoreBufferIssuing && |
| request->sentAllPackets())) |
| { |
| DPRINTF(MinorMem, "Trying to send request: %s to memory" |
| " system\n", *(request->inst)); |
| |
| if (lsq.tryToSend(request)) { |
| countIssuedStore(request); |
| issue_count++; |
| } else { |
| /* Don't step on to the next store buffer entry if this |
| * one hasn't issued all its packets as the store |
| * buffer must still enforce ordering */ |
| issued = false; |
| } |
| } |
| i++; |
| } |
| } |
| } |
| |
| void |
| LSQ::completeMemBarrierInst(MinorDynInstPtr inst, |
| bool committed) |
| { |
| if (committed) { |
| /* Not already sent to the store buffer as a store request? */ |
| if (!inst->inStoreBuffer) { |
| /* Insert an entry into the store buffer to tick off barriers |
| * until there are none in flight */ |
| storeBuffer.insert(new BarrierDataRequest(*this, inst)); |
| } |
| } else { |
| /* Clear the barrier anyway if it wasn't actually committed */ |
| clearMemBarrier(inst); |
| } |
| } |
| |
| void |
| LSQ::StoreBuffer::minorTrace() const |
| { |
| unsigned int size = slots.size(); |
| unsigned int i = 0; |
| std::ostringstream os; |
| |
| while (i < size) { |
| LSQRequestPtr request = slots[i]; |
| |
| request->reportData(os); |
| |
| i++; |
| if (i < numSlots) |
| os << ','; |
| } |
| |
| while (i < numSlots) { |
| os << '-'; |
| |
| i++; |
| if (i < numSlots) |
| os << ','; |
| } |
| |
| MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(), |
| numUnissuedAccesses); |
| } |
| |
| void |
| LSQ::tryToSendToTransfers(LSQRequestPtr request) |
| { |
| if (state == MemoryNeedsRetry) { |
| DPRINTF(MinorMem, "Request needs retry, not issuing to" |
| " memory until retry arrives\n"); |
| return; |
| } |
| |
| if (request->state == LSQRequest::InTranslation) { |
| DPRINTF(MinorMem, "Request still in translation, not issuing to" |
| " memory\n"); |
| return; |
| } |
| |
| assert(request->state == LSQRequest::Translated || |
| request->state == LSQRequest::RequestIssuing || |
| request->state == LSQRequest::Failed || |
| request->state == LSQRequest::Complete); |
| |
| if (requests.empty() || requests.front() != request) { |
| DPRINTF(MinorMem, "Request not at front of requests queue, can't" |
| " issue to memory\n"); |
| return; |
| } |
| |
| if (transfers.unreservedRemainingSpace() == 0) { |
| DPRINTF(MinorMem, "No space to insert request into transfers" |
| " queue\n"); |
| return; |
| } |
| |
| if (request->isComplete() || request->state == LSQRequest::Failed) { |
| DPRINTF(MinorMem, "Passing a %s transfer on to transfers" |
| " queue\n", (request->isComplete() ? "completed" : "failed")); |
| request->setState(LSQRequest::Complete); |
| request->setSkipped(); |
| moveFromRequestsToTransfers(request); |
| return; |
| } |
| |
| if (!execute.instIsRightStream(request->inst)) { |
| /* Wrong stream, try to abort the transfer but only do so if |
| * there are no packets in flight */ |
| if (request->hasPacketsInMemSystem()) { |
| DPRINTF(MinorMem, "Request's inst. is from the wrong stream," |
| " waiting for responses before aborting request\n"); |
| } else { |
| DPRINTF(MinorMem, "Request's inst. is from the wrong stream," |
| " aborting request\n"); |
| request->setState(LSQRequest::Complete); |
| request->setSkipped(); |
| moveFromRequestsToTransfers(request); |
| } |
| return; |
| } |
| |
| if (request->inst->translationFault != NoFault) { |
| if (request->inst->staticInst->isPrefetch()) { |
| DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n"); |
| } |
| DPRINTF(MinorMem, "Moving faulting request into the transfers" |
| " queue\n"); |
| request->setState(LSQRequest::Complete); |
| request->setSkipped(); |
| moveFromRequestsToTransfers(request); |
| return; |
| } |
| |
| bool is_load = request->isLoad; |
| bool is_llsc = request->request->isLLSC(); |
| bool is_swap = request->request->isSwap(); |
| bool is_atomic = request->request->isAtomic(); |
| bool bufferable = !(request->request->isStrictlyOrdered() || |
| is_llsc || is_swap || is_atomic); |
| |
| if (is_load) { |
| if (numStoresInTransfers != 0) { |
| DPRINTF(MinorMem, "Load request with stores still in transfers" |
| " queue, stalling\n"); |
| return; |
| } |
| } else { |
| /* Store. Can it be sent to the store buffer? */ |
| if (bufferable && !request->request->isMmappedIpr()) { |
| request->setState(LSQRequest::StoreToStoreBuffer); |
| moveFromRequestsToTransfers(request); |
| DPRINTF(MinorMem, "Moving store into transfers queue\n"); |
| return; |
| } |
| } |
| |
| /* Check if this is the head instruction (and so must be executable as |
| * its stream sequence number was checked above) for loads which must |
| * not be speculatively issued and stores which must be issued here */ |
| if (!bufferable) { |
| if (!execute.instIsHeadInst(request->inst)) { |
| DPRINTF(MinorMem, "Memory access not the head inst., can't be" |
| " sure it can be performed, not issuing\n"); |
| return; |
| } |
| |
| unsigned int forwarding_slot = 0; |
| |
| if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) != |
| NoAddrRangeCoverage) |
| { |
| // There's at least another request that targets the same |
| // address and is staying in the storeBuffer. Since our |
| // request is non-bufferable (e.g., strictly ordered or atomic), |
| // we must wait for the other request in the storeBuffer to |
| // complete before we can issue this non-bufferable request. |
| // This is to make sure that the order they access the cache is |
| // correct. |
| DPRINTF(MinorMem, "Memory access can receive forwarded data" |
| " from the store buffer, but need to wait for store buffer" |
| " to drain\n"); |
| return; |
| } |
| } |
| |
| /* True: submit this packet to the transfers queue to be sent to the |
| * memory system. |
| * False: skip the memory and push a packet for this request onto |
| * requests */ |
| bool do_access = true; |
| |
| if (!is_llsc) { |
| /* Check for match in the store buffer */ |
| if (is_load) { |
| unsigned int forwarding_slot = 0; |
| AddrRangeCoverage forwarding_result = |
| storeBuffer.canForwardDataToLoad(request, |
| forwarding_slot); |
| |
| switch (forwarding_result) { |
| case FullAddrRangeCoverage: |
| /* Forward data from the store buffer into this request and |
| * repurpose this request's packet into a response packet */ |
| storeBuffer.forwardStoreData(request, forwarding_slot); |
| request->packet->makeResponse(); |
| |
| /* Just move between queues, no access */ |
| do_access = false; |
| break; |
| case PartialAddrRangeCoverage: |
| DPRINTF(MinorMem, "Load partly satisfied by store buffer" |
| " data. Must wait for the store to complete\n"); |
| return; |
| break; |
| case NoAddrRangeCoverage: |
| DPRINTF(MinorMem, "No forwardable data from store buffer\n"); |
| /* Fall through to try access */ |
| break; |
| } |
| } |
| } else { |
| if (!canSendToMemorySystem()) { |
| DPRINTF(MinorMem, "Can't send request to memory system yet\n"); |
| return; |
| } |
| |
| SimpleThread &thread = *cpu.threads[request->inst->id.threadId]; |
| |
| TheISA::PCState old_pc = thread.pcState(); |
| ExecContext context(cpu, thread, execute, request->inst); |
| |
| /* Handle LLSC requests and tests */ |
| if (is_load) { |
| TheISA::handleLockedRead(&context, request->request); |
| } else { |
| do_access = TheISA::handleLockedWrite(&context, |
| request->request, cacheBlockMask); |
| |
| if (!do_access) { |
| DPRINTF(MinorMem, "Not perfoming a memory " |
| "access for store conditional\n"); |
| } |
| } |
| thread.pcState(old_pc); |
| } |
| |
| /* See the do_access comment above */ |
| if (do_access) { |
| if (!canSendToMemorySystem()) { |
| DPRINTF(MinorMem, "Can't send request to memory system yet\n"); |
| return; |
| } |
| |
| /* Remember if this is an access which can't be idly |
| * discarded by an interrupt */ |
| if (!bufferable && !request->issuedToMemory) { |
| numAccessesIssuedToMemory++; |
| request->issuedToMemory = true; |
| } |
| |
| if (tryToSend(request)) { |
| moveFromRequestsToTransfers(request); |
| } |
| } else { |
| request->setState(LSQRequest::Complete); |
| moveFromRequestsToTransfers(request); |
| } |
| } |
| |
| bool |
| LSQ::tryToSend(LSQRequestPtr request) |
| { |
| bool ret = false; |
| |
| if (!canSendToMemorySystem()) { |
| DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n", |
| *(request->inst)); |
| } else { |
| PacketPtr packet = request->getHeadPacket(); |
| |
| DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n", |
| *(request->inst), packet->req->getVaddr()); |
| |
| /* The sender state of the packet *must* be an LSQRequest |
| * so the response can be correctly handled */ |
| assert(packet->findNextSenderState<LSQRequest>()); |
| |
| if (request->request->isMmappedIpr()) { |
| ThreadContext *thread = |
| cpu.getContext(cpu.contextToThread( |
| request->request->contextId())); |
| |
| if (request->isLoad) { |
| DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst)); |
| TheISA::handleIprRead(thread, packet); |
| } else { |
| DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst)); |
| TheISA::handleIprWrite(thread, packet); |
| } |
| |
| request->stepToNextPacket(); |
| ret = request->sentAllPackets(); |
| |
| if (!ret) { |
| DPRINTF(MinorMem, "IPR access has another packet: %s\n", |
| *(request->inst)); |
| } |
| |
| if (ret) |
| request->setState(LSQRequest::Complete); |
| else |
| request->setState(LSQRequest::RequestIssuing); |
| } else if (dcachePort.sendTimingReq(packet)) { |
| DPRINTF(MinorMem, "Sent data memory request\n"); |
| |
| numAccessesInMemorySystem++; |
| |
| request->stepToNextPacket(); |
| |
| ret = request->sentAllPackets(); |
| |
| switch (request->state) { |
| case LSQRequest::Translated: |
| case LSQRequest::RequestIssuing: |
| /* Fully or partially issued a request in the transfers |
| * queue */ |
| request->setState(LSQRequest::RequestIssuing); |
| break; |
| case LSQRequest::StoreInStoreBuffer: |
| case LSQRequest::StoreBufferIssuing: |
| /* Fully or partially issued a request in the store |
| * buffer */ |
| request->setState(LSQRequest::StoreBufferIssuing); |
| break; |
| default: |
| panic("Unrecognized LSQ request state %d.", request->state); |
| } |
| |
| state = MemoryRunning; |
| } else { |
| DPRINTF(MinorMem, |
| "Sending data memory request - needs retry\n"); |
| |
| /* Needs to be resent, wait for that */ |
| state = MemoryNeedsRetry; |
| retryRequest = request; |
| |
| switch (request->state) { |
| case LSQRequest::Translated: |
| case LSQRequest::RequestIssuing: |
| request->setState(LSQRequest::RequestNeedsRetry); |
| break; |
| case LSQRequest::StoreInStoreBuffer: |
| case LSQRequest::StoreBufferIssuing: |
| request->setState(LSQRequest::StoreBufferNeedsRetry); |
| break; |
| default: |
| panic("Unrecognized LSQ request state %d.", request->state); |
| } |
| } |
| } |
| |
| if (ret) |
| threadSnoop(request); |
| |
| return ret; |
| } |
| |
| void |
| LSQ::moveFromRequestsToTransfers(LSQRequestPtr request) |
| { |
| assert(!requests.empty() && requests.front() == request); |
| assert(transfers.unreservedRemainingSpace() != 0); |
| |
| /* Need to count the number of stores in the transfers |
| * queue so that loads know when their store buffer forwarding |
| * results will be correct (only when all those stores |
| * have reached the store buffer) */ |
| if (!request->isLoad) |
| numStoresInTransfers++; |
| |
| requests.pop(); |
| transfers.push(request); |
| } |
| |
| bool |
| LSQ::canSendToMemorySystem() |
| { |
| return state == MemoryRunning && |
| numAccessesInMemorySystem < inMemorySystemLimit; |
| } |
| |
| bool |
| LSQ::recvTimingResp(PacketPtr response) |
| { |
| LSQRequestPtr request = |
| safe_cast<LSQRequestPtr>(response->popSenderState()); |
| |
| DPRINTF(MinorMem, "Received response packet inst: %s" |
| " addr: 0x%x cmd: %s\n", |
| *(request->inst), response->getAddr(), |
| response->cmd.toString()); |
| |
| numAccessesInMemorySystem--; |
| |
| if (response->isError()) { |
| DPRINTF(MinorMem, "Received error response packet: %s\n", |
| *request->inst); |
| } |
| |
| switch (request->state) { |
| case LSQRequest::RequestIssuing: |
| case LSQRequest::RequestNeedsRetry: |
| /* Response to a request from the transfers queue */ |
| request->retireResponse(response); |
| |
| DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n", |
| request->hasPacketsInMemSystem(), request->isComplete()); |
| |
| break; |
| case LSQRequest::StoreBufferIssuing: |
| case LSQRequest::StoreBufferNeedsRetry: |
| /* Response to a request from the store buffer */ |
| request->retireResponse(response); |
| |
| /* Remove completed requests unless they are barriers (which will |
| * need to be removed in order */ |
| if (request->isComplete()) { |
| if (!request->isBarrier()) { |
| storeBuffer.deleteRequest(request); |
| } else { |
| DPRINTF(MinorMem, "Completed transfer for barrier: %s" |
| " leaving the request as it is also a barrier\n", |
| *(request->inst)); |
| } |
| } |
| break; |
| default: |
| panic("Shouldn't be allowed to receive a response from another state"); |
| } |
| |
| /* We go to idle even if there are more things in the requests queue |
| * as it's the job of step to actually step us on to the next |
| * transaction */ |
| |
| /* Let's try and wake up the processor for the next cycle */ |
| cpu.wakeupOnEvent(Pipeline::ExecuteStageId); |
| |
| /* Never busy */ |
| return true; |
| } |
| |
| void |
| LSQ::recvReqRetry() |
| { |
| DPRINTF(MinorMem, "Received retry request\n"); |
| |
| assert(state == MemoryNeedsRetry); |
| |
| switch (retryRequest->state) { |
| case LSQRequest::RequestNeedsRetry: |
| /* Retry in the requests queue */ |
| retryRequest->setState(LSQRequest::Translated); |
| break; |
| case LSQRequest::StoreBufferNeedsRetry: |
| /* Retry in the store buffer */ |
| retryRequest->setState(LSQRequest::StoreInStoreBuffer); |
| break; |
| default: |
| panic("Unrecognized retry request state %d.", retryRequest->state); |
| } |
| |
| /* Set state back to MemoryRunning so that the following |
| * tryToSend can actually send. Note that this won't |
| * allow another transfer in as tryToSend should |
| * issue a memory request and either succeed for this |
| * request or return the LSQ back to MemoryNeedsRetry */ |
| state = MemoryRunning; |
| |
| /* Try to resend the request */ |
| if (tryToSend(retryRequest)) { |
| /* Successfully sent, need to move the request */ |
| switch (retryRequest->state) { |
| case LSQRequest::RequestIssuing: |
| /* In the requests queue */ |
| moveFromRequestsToTransfers(retryRequest); |
| break; |
| case LSQRequest::StoreBufferIssuing: |
| /* In the store buffer */ |
| storeBuffer.countIssuedStore(retryRequest); |
| break; |
| default: |
| panic("Unrecognized retry request state %d.", retryRequest->state); |
| } |
| |
| retryRequest = NULL; |
| } |
| } |
| |
| LSQ::LSQ(std::string name_, std::string dcache_port_name_, |
| MinorCPU &cpu_, Execute &execute_, |
| unsigned int in_memory_system_limit, unsigned int line_width, |
| unsigned int requests_queue_size, unsigned int transfers_queue_size, |
| unsigned int store_buffer_size, |
| unsigned int store_buffer_cycle_store_limit) : |
| Named(name_), |
| cpu(cpu_), |
| execute(execute_), |
| dcachePort(dcache_port_name_, *this, cpu_), |
| lastMemBarrier(cpu.numThreads, 0), |
| state(MemoryRunning), |
| inMemorySystemLimit(in_memory_system_limit), |
| lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), |
| requests(name_ + ".requests", "addr", requests_queue_size), |
| transfers(name_ + ".transfers", "addr", transfers_queue_size), |
| storeBuffer(name_ + ".storeBuffer", |
| *this, store_buffer_size, store_buffer_cycle_store_limit), |
| numAccessesInMemorySystem(0), |
| numAccessesInDTLB(0), |
| numStoresInTransfers(0), |
| numAccessesIssuedToMemory(0), |
| retryRequest(NULL), |
| cacheBlockMask(~(cpu_.cacheLineSize() - 1)) |
| { |
| if (in_memory_system_limit < 1) { |
| fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_, |
| in_memory_system_limit); |
| } |
| |
| if (store_buffer_cycle_store_limit < 1) { |
| fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be" |
| " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit); |
| } |
| |
| if (requests_queue_size < 1) { |
| fatal("%s: executeLSQRequestsQueueSize must be" |
| " >= 1 (%d)\n", name_, requests_queue_size); |
| } |
| |
| if (transfers_queue_size < 1) { |
| fatal("%s: executeLSQTransfersQueueSize must be" |
| " >= 1 (%d)\n", name_, transfers_queue_size); |
| } |
| |
| if (store_buffer_size < 1) { |
| fatal("%s: executeLSQStoreBufferSize must be" |
| " >= 1 (%d)\n", name_, store_buffer_size); |
| } |
| |
| if ((lineWidth & (lineWidth - 1)) != 0) { |
| fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth); |
| } |
| } |
| |
| LSQ::~LSQ() |
| { } |
| |
| LSQ::LSQRequest::~LSQRequest() |
| { |
| if (packet) |
| delete packet; |
| if (data) |
| delete [] data; |
| } |
| |
| /** |
| * Step the memory access mechanism on to its next state. In reality, most |
| * of the stepping is done by the callbacks on the LSQ but this |
| * function is responsible for issuing memory requests lodged in the |
| * requests queue. |
| */ |
| void |
| LSQ::step() |
| { |
| /* Try to move address-translated requests between queues and issue |
| * them */ |
| if (!requests.empty()) |
| tryToSendToTransfers(requests.front()); |
| |
| storeBuffer.step(); |
| } |
| |
| LSQ::LSQRequestPtr |
| LSQ::findResponse(MinorDynInstPtr inst) |
| { |
| LSQ::LSQRequestPtr ret = NULL; |
| |
| if (!transfers.empty()) { |
| LSQRequestPtr request = transfers.front(); |
| |
| /* Same instruction and complete access or a store that's |
| * capable of being moved to the store buffer */ |
| if (request->inst->id == inst->id) { |
| bool complete = request->isComplete(); |
| bool can_store = storeBuffer.canInsert(); |
| bool to_store_buffer = request->state == |
| LSQRequest::StoreToStoreBuffer; |
| |
| if ((complete && !(request->isBarrier() && !can_store)) || |
| (to_store_buffer && can_store)) |
| { |
| ret = request; |
| } |
| } |
| } |
| |
| if (ret) { |
| DPRINTF(MinorMem, "Found matching memory response for inst: %s\n", |
| *inst); |
| } else { |
| DPRINTF(MinorMem, "No matching memory response for inst: %s\n", |
| *inst); |
| } |
| |
| return ret; |
| } |
| |
| void |
| LSQ::popResponse(LSQ::LSQRequestPtr response) |
| { |
| assert(!transfers.empty() && transfers.front() == response); |
| |
| transfers.pop(); |
| |
| if (!response->isLoad) |
| numStoresInTransfers--; |
| |
| if (response->issuedToMemory) |
| numAccessesIssuedToMemory--; |
| |
| if (response->state != LSQRequest::StoreInStoreBuffer) { |
| DPRINTF(MinorMem, "Deleting %s request: %s\n", |
| (response->isLoad ? "load" : "store"), |
| *(response->inst)); |
| |
| delete response; |
| } |
| } |
| |
| void |
| LSQ::sendStoreToStoreBuffer(LSQRequestPtr request) |
| { |
| assert(request->state == LSQRequest::StoreToStoreBuffer); |
| |
| DPRINTF(MinorMem, "Sending store: %s to store buffer\n", |
| *(request->inst)); |
| |
| request->inst->inStoreBuffer = true; |
| |
| storeBuffer.insert(request); |
| } |
| |
| bool |
| LSQ::isDrained() |
| { |
| return requests.empty() && transfers.empty() && |
| storeBuffer.isDrained(); |
| } |
| |
| bool |
| LSQ::needsToTick() |
| { |
| bool ret = false; |
| |
| if (canSendToMemorySystem()) { |
| bool have_translated_requests = !requests.empty() && |
| requests.front()->state != LSQRequest::InTranslation && |
| transfers.unreservedRemainingSpace() != 0; |
| |
| ret = have_translated_requests || |
| storeBuffer.numUnissuedStores() != 0; |
| } |
| |
| if (ret) |
| DPRINTF(Activity, "Need to tick\n"); |
| |
| return ret; |
| } |
| |
| Fault |
| LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, |
| unsigned int size, Addr addr, Request::Flags flags, |
| uint64_t *res, AtomicOpFunctorPtr amo_op, |
| const std::vector<bool>& byteEnable) |
| { |
| assert(inst->translationFault == NoFault || inst->inLSQ); |
| |
| if (inst->inLSQ) { |
| return inst->translationFault; |
| } |
| |
| bool needs_burst = transferNeedsBurst(addr, size, lineWidth); |
| |
| if (needs_burst && inst->staticInst->isAtomic()) { |
| // AMO requests that access across a cache line boundary are not |
| // allowed since the cache does not guarantee AMO ops to be executed |
| // atomically in two cache lines |
| // For ISAs such as x86 that requires AMO operations to work on |
| // accesses that cross cache-line boundaries, the cache needs to be |
| // modified to support locking both cache lines to guarantee the |
| // atomicity. |
| panic("Do not expect cross-cache-line atomic memory request\n"); |
| } |
| |
| LSQRequestPtr request; |
| |
| /* Copy given data into the request. The request will pass this to the |
| * packet and then it will own the data */ |
| uint8_t *request_data = NULL; |
| |
| DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:" |
| " 0x%x%s lineWidth : 0x%x\n", |
| (isLoad ? "load" : "store/atomic"), addr, size, flags, |
| (needs_burst ? " (needs burst)" : ""), lineWidth); |
| |
| if (!isLoad) { |
| /* Request_data becomes the property of a ...DataRequest (see below) |
| * and destroyed by its destructor */ |
| request_data = new uint8_t[size]; |
| if (inst->staticInst->isAtomic() || |
| (flags & Request::STORE_NO_DATA)) { |
| /* For atomic or store-no-data, just use zeroed data */ |
| std::memset(request_data, 0, size); |
| } else { |
| std::memcpy(request_data, data, size); |
| } |
| } |
| |
| if (needs_burst) { |
| request = new SplitDataRequest( |
| *this, inst, isLoad, request_data, res); |
| } else { |
| request = new SingleDataRequest( |
| *this, inst, isLoad, request_data, res); |
| } |
| |
| if (inst->traceData) |
| inst->traceData->setMem(addr, size, flags); |
| |
| int cid = cpu.threads[inst->id.threadId]->getTC()->contextId(); |
| request->request->setContext(cid); |
| request->request->setVirt(0 /* asid */, |
| addr, size, flags, cpu.dataMasterId(), |
| /* I've no idea why we need the PC, but give it */ |
| inst->pc.instAddr(), std::move(amo_op)); |
| request->request->setByteEnable(byteEnable); |
| |
| requests.push(request); |
| inst->inLSQ = true; |
| request->startAddrTranslation(); |
| |
| return inst->translationFault; |
| } |
| |
| void |
| LSQ::pushFailedRequest(MinorDynInstPtr inst) |
| { |
| LSQRequestPtr request = new FailedDataRequest(*this, inst); |
| requests.push(request); |
| } |
| |
| void |
| LSQ::minorTrace() const |
| { |
| MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" |
| " lastMemBarrier=%d\n", |
| state, numAccessesInDTLB, numAccessesInMemorySystem, |
| numStoresInTransfers, lastMemBarrier[0]); |
| requests.minorTrace(); |
| transfers.minorTrace(); |
| storeBuffer.minorTrace(); |
| } |
| |
| LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_, |
| unsigned int store_buffer_size, |
| unsigned int store_limit_per_cycle) : |
| Named(name_), lsq(lsq_), |
| numSlots(store_buffer_size), |
| storeLimitPerCycle(store_limit_per_cycle), |
| slots(), |
| numUnissuedAccesses(0) |
| { |
| } |
| |
| PacketPtr |
| makePacketForRequest(const RequestPtr &request, bool isLoad, |
| Packet::SenderState *sender_state, PacketDataPtr data) |
| { |
| PacketPtr ret = isLoad ? Packet::createRead(request) |
| : Packet::createWrite(request); |
| |
| if (sender_state) |
| ret->pushSenderState(sender_state); |
| |
| if (isLoad) { |
| ret->allocate(); |
| } else if (!request->isCacheMaintenance()) { |
| // CMOs are treated as stores but they don't have data. All |
| // stores otherwise need to allocate for data. |
| ret->dataDynamic(data); |
| } |
| |
| return ret; |
| } |
| |
| void |
| LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) |
| { |
| assert(inst->isInst() && inst->staticInst->isMemBarrier()); |
| assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]); |
| |
| /* Remember the barrier. We only have a notion of one |
| * barrier so this may result in some mem refs being |
| * delayed if they are between barriers */ |
| lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum; |
| } |
| |
| void |
| LSQ::LSQRequest::makePacket() |
| { |
| assert(inst->translationFault == NoFault); |
| |
| /* Make the function idempotent */ |
| if (packet) |
| return; |
| |
| packet = makePacketForRequest(request, isLoad, this, data); |
| /* Null the ret data so we know not to deallocate it when the |
| * ret is destroyed. The data now belongs to the ret and |
| * the ret is responsible for its destruction */ |
| data = NULL; |
| } |
| |
| std::ostream & |
| operator <<(std::ostream &os, LSQ::MemoryState state) |
| { |
| switch (state) { |
| case LSQ::MemoryRunning: |
| os << "MemoryRunning"; |
| break; |
| case LSQ::MemoryNeedsRetry: |
| os << "MemoryNeedsRetry"; |
| break; |
| default: |
| os << "MemoryState-" << static_cast<int>(state); |
| break; |
| } |
| return os; |
| } |
| |
| void |
| LSQ::recvTimingSnoopReq(PacketPtr pkt) |
| { |
| /* LLSC operations in Minor can't be speculative and are executed from |
| * the head of the requests queue. We shouldn't need to do more than |
| * this action on snoops. */ |
| for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { |
| if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { |
| cpu.wakeup(tid); |
| } |
| } |
| |
| if (pkt->isInvalidate() || pkt->isWrite()) { |
| for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { |
| TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, |
| cacheBlockMask); |
| } |
| } |
| } |
| |
| void |
| LSQ::threadSnoop(LSQRequestPtr request) |
| { |
| /* LLSC operations in Minor can't be speculative and are executed from |
| * the head of the requests queue. We shouldn't need to do more than |
| * this action on snoops. */ |
| ThreadID req_tid = request->inst->id.threadId; |
| PacketPtr pkt = request->packet; |
| |
| for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { |
| if (tid != req_tid) { |
| if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { |
| cpu.wakeup(tid); |
| } |
| |
| if (pkt->isInvalidate() || pkt->isWrite()) { |
| TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, |
| cacheBlockMask); |
| } |
| } |
| } |
| } |
| |
| } |