blob: d8bd11089784a26682e028c07666f1c68aab033f [file] [log] [blame]
/**
* Copyright (c) 2018 Metempsy Technology Consulting
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "mem/cache/prefetch/bop.hh"
#include "debug/HWPrefetch.hh"
#include "params/BOPPrefetcher.hh"
namespace Prefetcher {
BOP::BOP(const BOPPrefetcherParams &p)
: Queued(p),
scoreMax(p.score_max), roundMax(p.round_max),
badScore(p.bad_score), rrEntries(p.rr_size),
tagMask((1 << p.tag_bits) - 1),
delayQueueEnabled(p.delay_queue_enable),
delayQueueSize(p.delay_queue_size),
delayTicks(cyclesToTicks(p.delay_queue_cycles)),
delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0),
bestScore(0), round(0)
{
if (!isPowerOf2(rrEntries)) {
fatal("%s: number of RR entries is not power of 2\n", name());
}
if (!isPowerOf2(blkSize)) {
fatal("%s: cache line size is not power of 2\n", name());
}
if (!(p.negative_offsets_enable && (p.offset_list_size % 2 == 0))) {
fatal("%s: negative offsets enabled with odd offset list size\n",
name());
}
rrLeft.resize(rrEntries);
rrRight.resize(rrEntries);
// Following the paper implementation, a list with the specified number
// of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
const int factors[] = { 2, 3, 5 };
unsigned int i = 0;
int64_t offset_i = 1;
while (i < p.offset_list_size)
{
int64_t offset = offset_i;
for (int n : factors) {
while ((offset % n) == 0) {
offset /= n;
}
}
if (offset == 1) {
offsetsList.push_back(OffsetListEntry(offset_i, 0));
i++;
// If we want to use negative offsets, add also the negative value
// of the offset just calculated
if (p.negative_offsets_enable) {
offsetsList.push_back(OffsetListEntry(-offset_i, 0));
i++;
}
}
offset_i++;
}
offsetsListIterator = offsetsList.begin();
}
void
BOP::delayQueueEventWrapper()
{
while (!delayQueue.empty() &&
delayQueue.front().processTick <= curTick())
{
Addr addr_x = delayQueue.front().baseAddr;
insertIntoRR(addr_x, RRWay::Left);
delayQueue.pop_front();
}
// Schedule an event for the next element if there is one
if (!delayQueue.empty()) {
schedule(delayQueueEvent, delayQueue.front().processTick);
}
}
unsigned int
BOP::hash(Addr addr, unsigned int way) const
{
Addr hash1 = addr >> way;
Addr hash2 = hash1 >> floorLog2(rrEntries);
return (hash1 ^ hash2) & (Addr)(rrEntries - 1);
}
void
BOP::insertIntoRR(Addr addr, unsigned int way)
{
switch (way) {
case RRWay::Left:
rrLeft[hash(addr, RRWay::Left)] = addr;
break;
case RRWay::Right:
rrRight[hash(addr, RRWay::Right)] = addr;
break;
}
}
void
BOP::insertIntoDelayQueue(Addr x)
{
if (delayQueue.size() == delayQueueSize) {
return;
}
// Add the address to the delay queue and schedule an event to process
// it after the specified delay cycles
Tick process_tick = curTick() + delayTicks;
delayQueue.push_back(DelayQueueEntry(x, process_tick));
if (!delayQueueEvent.scheduled()) {
schedule(delayQueueEvent, process_tick);
}
}
void
BOP::resetScores()
{
for (auto& it : offsetsList) {
it.second = 0;
}
}
inline Addr
BOP::tag(Addr addr) const
{
return (addr >> blkSize) & tagMask;
}
bool
BOP::testRR(Addr addr) const
{
for (auto& it : rrLeft) {
if (it == addr) {
return true;
}
}
for (auto& it : rrRight) {
if (it == addr) {
return true;
}
}
return false;
}
void
BOP::bestOffsetLearning(Addr x)
{
Addr offset_addr = (*offsetsListIterator).first;
Addr lookup_addr = x - offset_addr;
// There was a hit in the RR table, increment the score for this offset
if (testRR(lookup_addr)) {
DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x);
(*offsetsListIterator).second++;
if ((*offsetsListIterator).second > bestScore) {
bestScore = (*offsetsListIterator).second;
phaseBestOffset = (*offsetsListIterator).first;
DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore);
}
}
offsetsListIterator++;
// All the offsets in the list were visited meaning that a learning
// phase finished. Check if
if (offsetsListIterator == offsetsList.end()) {
offsetsListIterator = offsetsList.begin();
round++;
// Check if the best offset must be updated if:
// (1) One of the scores equals SCORE_MAX
// (2) The number of rounds equals ROUND_MAX
if ((bestScore >= scoreMax) || (round == roundMax)) {
bestOffset = phaseBestOffset;
round = 0;
bestScore = 0;
phaseBestOffset = 0;
resetScores();
issuePrefetchRequests = true;
} else if (phaseBestOffset <= badScore) {
issuePrefetchRequests = false;
}
}
}
void
BOP::calculatePrefetch(const PrefetchInfo &pfi,
std::vector<AddrPriority> &addresses)
{
Addr addr = pfi.getAddr();
Addr tag_x = tag(addr);
if (delayQueueEnabled) {
insertIntoDelayQueue(tag_x);
} else {
insertIntoRR(tag_x, RRWay::Left);
}
// Go through the nth offset and update the score, the best score and the
// current best offset if a better one is found
bestOffsetLearning(tag_x);
// This prefetcher is a degree 1 prefetch, so it will only generate one
// prefetch at most per access
if (issuePrefetchRequests) {
Addr prefetch_addr = addr + (bestOffset << lBlkSize);
addresses.push_back(AddrPriority(prefetch_addr, 0));
DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr);
}
}
void
BOP::notifyFill(const PacketPtr& pkt)
{
// Only insert into the RR right way if it's the pkt is a HWP
if (!pkt->cmd.isHWPrefetch()) return;
Addr tag_y = tag(pkt->getAddr());
if (issuePrefetchRequests) {
insertIntoRR(tag_y - bestOffset, RRWay::Right);
}
}
} // namespace Prefetcher