<!DOCTYPE html>
<html>
<head>
	<meta charset="utf-8">
	<meta http-equiv="x-ua-compatible" content="ie=edge">
	<meta name="viewport" content="width=device-width, initial-scale=1">

	<title>gem5</title>

	<!-- SITE FAVICON -->
	<link rel="shortcut icon" type="image/gif" href="/assets/img/gem5ColorVert.gif"/>

	<link rel="canonical" href="http://localhost:4000/simplecache/">
	<link href='https://fonts.googleapis.com/css?family=Open+Sans:400,300,700,800,600' rel='stylesheet' type='text/css'>
	<link href='https://fonts.googleapis.com/css?family=Muli:400,300' rel='stylesheet' type='text/css'>

	<!-- FAVICON -->
	<link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css">

	<!-- BOOTSTRAP -->
	<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">

	<!-- CUSTOM CSS -->
	<link rel="stylesheet" href="/css/main.css">
</head>


<body>
	<nav class="navbar navbar-expand-md navbar-light bg-light">
  <a class="navbar-brand" href="/">
		<img src="/assets/img/gem5ColorLong.gif" alt="gem5" height=45px>
	</a>
  <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNavDropdown" aria-controls="navbarNavDropdown" aria-expanded="false" aria-label="Toggle navigation">
    <span class="navbar-toggler-icon"></span>
  </button>
  <div class="collapse navbar-collapse" id="navbarNavDropdown">
    <ul class="navbar-nav ml-auto">
      <li class="nav-item ">
        <a class="nav-link" href="/">Home</a>
      </li>

			<li class="nav-item dropdown ">
				<a class="nav-link dropdown-toggle" href="/about" id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
					About
				</a>
				<div class="dropdown-menu" aria-labelledby="navbarDropdownMenuLink">
          <a class="dropdown-item" href="/about">About</a>
          <a class="dropdown-item" href="/publications">Publications</a>
          <a class="dropdown-item" href="/governance">Governance</a>
				</div>
			</li>

			<li class="nav-item dropdown active">
				<a class="nav-link dropdown-toggle" href="#" id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
					Documentation
				</a>
				<div class="dropdown-menu" aria-labelledby="navbarDropdownMenuLink">
					<!-- Pull navigation from _data/documentation.yml -->
					
            <a class="dropdown-item" href="/introduction">Introduction</a>
					
            <a class="dropdown-item" href="/building">Getting Started</a>
					
            <a class="dropdown-item" href="/environment">Modifying/Extending</a>
					
            <a class="dropdown-item" href="/MSIintro">Modeling Cache Coherence with Ruby</a>
					
				</div>
			</li>

      <li class="nav-item ">
        <a class="nav-link" href="/contributing">Contributing</a>
      </li>

			<li class="nav-item ">
        <a class="nav-link" href="/search">Search</a>
      </li>
    </ul>
  </div>
</nav>

	<main>
		<div class="sidenav-top">
  <img src="/assets/img/gem5ColorLong.gif" height="80">
  <div class="search">
    <form action="/search" method="get">
      <!-- <label for="search-box"><i class="fa fa-search"></i></label> -->
      <input type="text" name="query">
      <button type="submit" name="submit"><i class="fa fa-search"></i></button>
    </form>
  </div>
</div>
<div class="sidenav">
  <!-- Pull navigation from _data/documentation.yml -->
  
    <a class="item" href="/introduction" role="button" aria-expanded="false" aria-controls="collapseExample">
      Introduction
    </a>
    <div class="collapse " id="introduction">
      
    </div>
  
    <a class="item" data-toggle="collapse" href="#pt1" role="button" aria-expanded="false" aria-controls="collapseExample">
      Getting Started
    </a>
    <div class="collapse " id="pt1">
      
        <a class="subitem " href="/building">Building gem5</a>
      
        <a class="subitem " href="/simple_config">Creating a simple configuration script</a>
      
        <a class="subitem " href="/cache_config">Adding cache to configuration script</a>
      
        <a class="subitem " href="/gem5_stats">Understanding gem5 statistics and output</a>
      
        <a class="subitem " href="/example_configs">Using the default configuration scripts</a>
      
    </div>
  
    <a class="item" data-toggle="collapse" href="#pt2" role="button" aria-expanded="false" aria-controls="collapseExample">
      Modifying/Extending
    </a>
    <div class="collapse show" id="pt2">
      
        <a class="subitem " href="/environment">Setting up your development environment</a>
      
        <a class="subitem " href="/helloobject">Creating a very simple SimObject</a>
      
        <a class="subitem " href="/debugging">Debugging gem5</a>
      
        <a class="subitem " href="/events">Event-driven programming</a>
      
        <a class="subitem " href="/parameters">Adding parameters to SimObjects and more events</a>
      
        <a class="subitem " href="/memoryobject">Creating SimObjects in the memory system</a>
      
        <a class="subitem active" href="/simplecache">Creating a simple cache object</a>
      
    </div>
  
    <a class="item" data-toggle="collapse" href="#pt3" role="button" aria-expanded="false" aria-controls="collapseExample">
      Modeling Cache Coherence with Ruby
    </a>
    <div class="collapse " id="pt3">
      
        <a class="subitem " href="/MSIintro">Introduction to Ruby</a>
      
        <a class="subitem " href="/cache-intro">MSI example cache protocol</a>
      
        <a class="subitem " href="/cache-declarations">Declaring a state machine</a>
      
        <a class="subitem " href="/cache-in-ports">In port code blocks</a>
      
        <a class="subitem " href="/cache-actions">Action code blocks</a>
      
        <a class="subitem " href="/cache-transitions">Transition code blocks</a>
      
        <a class="subitem " href="/directory">MSI Directory implementation</a>
      
        <a class="subitem " href="/MSIbuilding">Compiling a SLICC protocol</a>
      
        <a class="subitem " href="/configuration">Configuring a simple Ruby system</a>
      
        <a class="subitem " href="/running">Running the simple Ruby system</a>
      
        <a class="subitem " href="/MSIdebugging">Debugging SLICC Protocols</a>
      
        <a class="subitem " href="/simple-MI_example">Configuring for a standard protocol</a>
      
    </div>
  
</div>

<div class="container" id="doc-container">
  <div class="edit"><a href="https://github.com/new-website/tree/master/_pages/documentation/part2/simplecache.md">Edit this page</a></div>
  <dl>
  <dt>authors</dt>
  <dd>Jason Lowe-Power</dd>
</dl>

<h1 id="creating-a-simple-cache-object">Creating a simple cache object</h1>

<p>In this chapter, we will take the framework for a memory object we
created in the last chapter &lt;memoryobject-chapter&gt; and add caching
logic to it.</p>

<h2 id="simplecache-simobject">SimpleCache SimObject</h2>

<p>After creating the SConscript file, that you can download
here &lt;../_static/scripts/part2/simplecache/SConscript&gt;, we can create
the SimObject Python file. We will call this simple memory object
<code class="highlighter-rouge">SimpleCache</code> and create the SimObject Python file in
<code class="highlighter-rouge">src/learning_gem5/simple_cache</code>.</p>

<p>``` {.sourceCode .python}
from m5.params import *
from m5.proxy import *
from MemObject import MemObject</p>

<p>class SimpleCache(MemObject):
    type = ‘SimpleCache’
    cxx_header = “learning_gem5/simple_cache/simple_cache.hh”</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>cpu_side = VectorSlavePort("CPU side port, receives requests")
mem_side = MasterPort("Memory side port, sends requests")

latency = Param.Cycles(1, "Cycles taken on a hit or to resolve a miss")

size = Param.MemorySize('16kB', "The size of the cache")

system = Param.System(Parent.any, "The system this cache is part of") ```
</code></pre></div></div>

<p>There are a couple of differences between this SimObject file and the
one from the previous chapter &lt;memoryobject-chapter&gt;. First, we have a
couple of extra parameters. Namely, a latency for cache accesses and the
size of the cache. parameters-chapter goes into more detail about these
kinds of SimObject parameters.</p>

<p>Next, we include a <code class="highlighter-rouge">System</code> parameter, which is a pointer to the main
system this cache is connected to. This is needed so we can get the
cache block size from the system object when we are initializing the
cache. To reference the system object this cache is connected to, we use
a special <em>proxy parameter</em>. In this case, we use <code class="highlighter-rouge">Parent.any</code>.</p>

<p>In the Python config file, when a <code class="highlighter-rouge">SimpleCache</code> is instantiated, this
proxy parameter searches through all of the parents of the <code class="highlighter-rouge">SimpleCache</code>
instance to find a SimObject that matches the <code class="highlighter-rouge">System</code> type. Since we
often use a <code class="highlighter-rouge">System</code> as the root SimObject, you will often see a
<code class="highlighter-rouge">system</code> parameter resolved with this proxy parameter.</p>

<p>The third and final difference between the <code class="highlighter-rouge">SimpleCache</code> and the
<code class="highlighter-rouge">SimpleMemobj</code> is that instead of having two named CPU ports
(<code class="highlighter-rouge">inst_port</code> and <code class="highlighter-rouge">data_port</code>), the <code class="highlighter-rouge">SimpleCache</code> use another special
parameter: the <code class="highlighter-rouge">VectorPort</code>. <code class="highlighter-rouge">VectorPorts</code> behave similarly to regular
ports (e.g., they are resolved via <code class="highlighter-rouge">getMasterPort</code> and <code class="highlighter-rouge">getSlavePort</code>),
but they allow this object to connect to multiple peers. Then, in the
resolution functions the parameter we ignored before (<code class="highlighter-rouge">PortID idx</code>) is
used to differentiate between the different ports. By using a vector
port, this cache can be connected into the system more flexibly than the
<code class="highlighter-rouge">SimpleMemobj</code>.</p>

<h2 id="implementing-the-simplecache">Implementing the SimpleCache</h2>

<p>Most of the code for the <code class="highlighter-rouge">`SimpleCache</code> is the same as the
<code class="highlighter-rouge">SimpleMemobj</code>. There are a couple of changes in the constructor and the
key memory object functions.</p>

<p>First, we need to create the CPU side ports dynamically in the
constructor and initialize the extra member functions based on the
SimObject parameters.</p>

<p>``` {.sourceCode .c++}
SimpleCache::SimpleCache(SimpleCacheParams *params) :
    MemObject(params),
    latency(params-&gt;latency),
    blockSize(params-&gt;system-&gt;cacheLineSize()),
    capacity(params-&gt;size / blockSize),
    memPort(params-&gt;name + “.mem_side”, this),
    blocked(false), outstandingPacket(nullptr), waitingPortId(-1)
{
    for (int i = 0; i &lt; params-&gt;port_cpu_side_connection_count; ++i) {
        cpuPorts.emplace_back(name() + csprintf(“.cpu_side[%d]”, i), i, this);
    }
}</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
In this function, we use the `cacheLineSize` from the system parameters
to set the `blockSize` for this cache. We also initialize the capacity
based on the block size and the parameter and initialize other member
variables we will need below. Finally, we must create a number of
`CPUSidePorts` based on the number of connections to this object. Since
the `cpu_side` port was declared as a `VectorSlavePort` in the SimObject
Python file, the parameter automatically has a variable
`port_cpu_side_connection_count`. This is based on the Python name of
the parameter. For each of these connections we add a new `CPUSidePort`
to a `cpuPorts` vector declared in the `SimpleCache` class.

We also add one extra member variable to the `CPUSidePort` to save its
id, and we add this as a parameter to its constructor.

Next, we need to implement `getMasterPort` and `getSlavePort`. The
`getMasterPort` is exactly the same as the `SimpleMemobj`. For
`getSlavePort`, we now need to return the port based on the id
requested.

``` {.sourceCode .c++}
BaseSlavePort&amp;
SimpleCache::getSlavePort(const std::string&amp; if_name, PortID idx)
{
    if (if_name == "cpu_side" &amp;&amp; idx &lt; cpuPorts.size()) {
        return cpuPorts[idx];
    } else {
        return MemObject::getSlavePort(if_name, idx);
    }
}
</code></pre></div></div>

<p>The implementation of the <code class="highlighter-rouge">CPUSidePort</code> and the <code class="highlighter-rouge">MemSidePort</code> is almost
the same as in the <code class="highlighter-rouge">SimpleMemobj</code>. The only difference is we need to add
an extra parameter to <code class="highlighter-rouge">handleRequest</code> that is the id of the port which
the request originated. Without this id, we would not be able to forward
the response to the correct port. The <code class="highlighter-rouge">SimpleMemobj</code> knew which port to
send replies based on whether the original request was an instruction or
data accesses. However, this information is not useful to the
<code class="highlighter-rouge">SimpleCache</code> since it uses a vector of ports and not named ports.</p>

<p>The new <code class="highlighter-rouge">handleRequest</code> function does two different things than the
<code class="highlighter-rouge">handleRequest</code> function in the <code class="highlighter-rouge">SimpleMemobj</code>. First, it stores the
port id of the request as discussed above. Since the <code class="highlighter-rouge">SimpleCache</code> is
blocking and only allows a single request outstanding at a time, we only
need to save a single port id.</p>

<p>Second, it takes time to access a cache. Therefore, we need to take into
account the latency to access the cache tags and the cache data for a
request. We added an extra parameter to the cache object for this, and
in <code class="highlighter-rouge">handleRequest</code> we now use an event to stall the request for the
needed amount of time. We schedule a new event for <code class="highlighter-rouge">latency</code> cycles in
the future. The <code class="highlighter-rouge">clockEdge</code> function returns the <em>tick</em> that the <em>nth</em>
cycle in the future occurs on.</p>

<p>``` {.sourceCode .c++}
bool
SimpleCache::handleRequest(PacketPtr pkt, int port_id)
{
    if (blocked) {
        return false;
    }
    DPRINTF(SimpleCache, “Got request for addr %#x\n”, pkt-&gt;getAddr());</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>blocked = true;
waitingPortId = port_id;

schedule(new AccessEvent(this, pkt), clockEdge(latency));

return true; } ```
</code></pre></div></div>

<p>The <code class="highlighter-rouge">AccessEvent</code> is a little more complicated than the <code class="highlighter-rouge">EventWrapper</code>
we used in events-chapter. Instead of using an <code class="highlighter-rouge">EventWrapper</code>, in the
<code class="highlighter-rouge">SimpleCache</code> we will use a new class. The reason we cannot use an
<code class="highlighter-rouge">EventWrapper</code>, is that we need to pass the packet (<code class="highlighter-rouge">pkt</code>) from
<code class="highlighter-rouge">handleRequest</code> to the event handler function. The following code is the
<code class="highlighter-rouge">AccessEvent</code> class. We only need to implement the <code class="highlighter-rouge">process</code> function,
that calls the function we want to use as our event handler, in this
case <code class="highlighter-rouge">accessTming</code>. We also pass the flag <code class="highlighter-rouge">AutoDelete</code> to the event
constructor so we do not need to worry about freeing the memory for the
dynamically created object. The event code will automatically delete the
object after the <code class="highlighter-rouge">process</code> function has executed.</p>

<p>``` {.sourceCode .c++}
class AccessEvent : public Event
{
  private:
    SimpleCache *cache;
    PacketPtr pkt;
  public:
    AccessEvent(SimpleCache *cache, PacketPtr pkt) :
        Event(Default_Pri, AutoDelete), cache(cache), pkt(pkt)
    { }
    void process() override {
        cache-&gt;accessTiming(pkt);
    }
};</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
Now, we need to implement the event handler, `accessTiming`.

``` {.sourceCode .c++}
void
SimpleCache::accessTiming(PacketPtr pkt)
{
    bool hit = accessFunctional(pkt);
    if (hit) {
        pkt-&gt;makeResponse();
        sendResponse(pkt);
    } else {
        &lt;miss handling&gt;
    }
}
</code></pre></div></div>

<p>This function first <em>functionally</em> accesses the cache. This function
<code class="highlighter-rouge">accessFunctional</code> (described below) performs the functional access of
the cache and either reads or writes the cache on a hit or returns that
the access was a miss.</p>

<p>If the access is a hit, we simply need to respond to the packet. To
respond, you first must call the function <code class="highlighter-rouge">makeResponse</code> on the packet.
This converts the packet from a request packet to a response packet. For
instance, if the memory command in the packet was a <code class="highlighter-rouge">ReadReq</code> this gets
converted into a <code class="highlighter-rouge">ReadResp</code>. Writes behave similarly. Then, we can send
the response back to the CPU.</p>

<p>The <code class="highlighter-rouge">sendResponse</code> function does the same things as the <code class="highlighter-rouge">handleResponse</code>
function in the <code class="highlighter-rouge">SimpleMemobj</code> except that it uses the <code class="highlighter-rouge">waitingPortId</code>
to send the packet to the right port. In this function, we need to mark
the <code class="highlighter-rouge">SimpleCache</code> unblocked before calling <code class="highlighter-rouge">sendPacket</code> in case the peer
on the CPU side immediately calls <code class="highlighter-rouge">sendTimingReq</code>. Then, we try to send
retries to the CPU side ports if the <code class="highlighter-rouge">SimpleCache</code> can now receive
requests and the ports need to be sent retries.</p>

<p>``` {.sourceCode .c++}
void SimpleCache::sendResponse(PacketPtr pkt)
{
    int port = waitingPortId;</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>blocked = false;
waitingPortId = -1;

cpuPorts[port].sendPacket(pkt);
for (auto&amp; port : cpuPorts) {
    port.trySendRetry();
} } ```
</code></pre></div></div>

<hr />

<p>Back to the <code class="highlighter-rouge">accessTiming</code> function, we now need to handle the cache
miss case. On a miss, we first have to check to see if the missing
packet is to an entire cache block. If the packet is aligned and the
size of the request is the size of a cache block, then we can simply
forward the request to memory, just like in the <code class="highlighter-rouge">SimpleMemobj</code>.</p>

<p>However, if the packet is smaller than a cache block, then we need to
create a new packet to read the entire cache block from memory. Here,
whether the packet is a read or a write request, we send a read request
to memory to load the data for the cache block into the cache. In the
case of a write, it will occur in the cache after we have loaded the
data from memory.</p>

<p>Then, we create a new packet, that is <code class="highlighter-rouge">blockSize</code> in size and we call
the <code class="highlighter-rouge">allocate</code> function to allocate memory in the <code class="highlighter-rouge">Packet</code> object for
the data that we will read from memory. Note: this memory is freed when
we free the packet. We use the original request object in the packet so
the memory-side objects know the original requestor and the original
request type for statistics.</p>

<p>Finally, we save the original packet pointer (<code class="highlighter-rouge">pkt</code>) in a member
variable <code class="highlighter-rouge">outstandingPacket</code> so we can recover it when the <code class="highlighter-rouge">SimpleCache</code>
receives a response. Then, we send the new packet across the memory side
port.</p>

<p>``` {.sourceCode .c++}
void
SimpleCache::accessTiming(PacketPtr pkt)
{
    bool hit = accessFunctional(pkt);
    if (hit) {
        pkt-&gt;makeResponse();
        sendResponse(pkt);
    } else {
        Addr addr = pkt-&gt;getAddr();
        Addr block_addr = pkt-&gt;getBlockAddr(blockSize);
        unsigned size = pkt-&gt;getSize();
        if (addr == block_addr &amp;&amp; size == blockSize) {
            DPRINTF(SimpleCache, “forwarding packet\n”);
            memPort.sendPacket(pkt);
        } else {
            DPRINTF(SimpleCache, “Upgrading packet to block size\n”);
            panic_if(addr - block_addr + size &gt; blockSize,
                     “Cannot handle accesses that span multiple cache lines”);</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>        assert(pkt-&gt;needsResponse());
        MemCmd cmd;
        if (pkt-&gt;isWrite() || pkt-&gt;isRead()) {
            cmd = MemCmd::ReadReq;
        } else {
            panic("Unknown packet type in upgrade size");
        }

        PacketPtr new_pkt = new Packet(pkt-&gt;req, cmd, blockSize);
        new_pkt-&gt;allocate();

        outstandingPacket = pkt;

        memPort.sendPacket(new_pkt);
    }
} } ```
</code></pre></div></div>

<p>On a response from memory, we know that this was caused by a cache miss.
The first step is to insert the responding packet into the cache.</p>

<p>Then, either there is an <code class="highlighter-rouge">outstandingPacket</code>, in which case we need to
forward that packet to the original requestor, or there is no
<code class="highlighter-rouge">outstandingPacket</code> which means we should forward the <code class="highlighter-rouge">pkt</code> in the
response to the original requestor.</p>

<p>If the packet we are receiving as a response was an upgrade packet
because the original request was smaller than a cache line, then we need
to copy the new data to the outstandingPacket packet or write to the
cache on a write. Then, we need to delete the new packet that we made in
the miss handling logic.</p>

<p>``` {.sourceCode .c++}
bool
SimpleCache::handleResponse(PacketPtr pkt)
{
    assert(blocked);
    DPRINTF(SimpleCache, “Got response for addr %#x\n”, pkt-&gt;getAddr());
    insert(pkt);</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>if (outstandingPacket != nullptr) {
    accessFunctional(outstandingPacket);
    outstandingPacket-&gt;makeResponse();
    delete pkt;
    pkt = outstandingPacket;
    outstandingPacket = nullptr;
} // else, pkt contains the data it needs

sendResponse(pkt);

return true; } ```
</code></pre></div></div>

<h3 id="functional-cache-logic">Functional cache logic</h3>

<p>Now, we need to implement two more functions: <code class="highlighter-rouge">accessFunctional</code> and
<code class="highlighter-rouge">insert</code>. These two functions make up the key components of the cache
logic.</p>

<p>First, to functionally update the cache, we first need storage for the
cache contents. The simplest possible cache storage is a map (hashtable)
that maps from addresses to data. Thus, we will add the following member
to the <code class="highlighter-rouge">SimpleCache</code>.</p>

<p>``` {.sourceCode .c++}
std::unordered_map&lt;Addr, uint8_t*&gt; cacheStore;</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
To access the cache, we first check to see if there is an entry in the
map which matches the address in the packet. We use the `getBlockAddr`
function of the `Packet` type to get the block-aligned address. Then, we
simply search for that address in the map. If we do not find the
address, then this function returns `false`, the data is not in the
cache, and it is a miss.

Otherwise, if the packet is a write request, we need to update the data
in the cache. To do this, we write the data from the packet to the
cache. We use the `writeDataToBlock` function which writes the data in
the packet to the write offset into a potentially larger block of data.
This function takes the cache block offset and the block size (as a
parameter) and writes the correct offset into the pointer passed as the
first parameter.

If the packet is a read request, we need to update the packet's data
with the data from the cache. The `setDataFromBlock` function performs
the same offset calculation as the `writeDataToBlock` function, but
writes the packet with the data from the pointer in the first parameter.

``` {.sourceCode .c++}
bool
SimpleCache::accessFunctional(PacketPtr pkt)
{
    Addr block_addr = pkt-&gt;getBlockAddr(blockSize);
    auto it = cacheStore.find(block_addr);
    if (it != cacheStore.end()) {
        if (pkt-&gt;isWrite()) {
            pkt-&gt;writeDataToBlock(it-&gt;second, blockSize);
        } else if (pkt-&gt;isRead()) {
            pkt-&gt;setDataFromBlock(it-&gt;second, blockSize);
        } else {
            panic("Unknown packet type!");
        }
        return true;
    }
    return false;
}
</code></pre></div></div>

<p>Finally, we also need to implement the <code class="highlighter-rouge">insert</code> function. This function
is called every time the memory side port responds to a request.</p>

<p>The first step is to check if the cache is currently full. If the cache
has more entries (blocks) than the capacity of the cache as set by the
SimObject parameter, then we need to evict something. The following code
evicts a random entry by leveraging the hashtable implementation of the
C++ <code class="highlighter-rouge">unordered_map</code>.</p>

<p>On an eviction, we need to write the data back to the backing memory in
case it has been updated. For this, we create a new <code class="highlighter-rouge">Request</code>-<code class="highlighter-rouge">Packet</code>
pair. The packet uses a new memory command: <code class="highlighter-rouge">MemCmd::WritebackDirty</code>.
Then, we send the packet across the memory side port (<code class="highlighter-rouge">memPort</code>) and
erase the entry in the cache storage map.</p>

<p>Then, after a block has potentially been evicted, we add the new address
to the cache. For this we simply allocate space for the block and add an
entry to the map. Finally, we write the data from the response packet in
to the newly allocated block. This data is guaranteed to be the size of
the cache block since we made sure to make a new packet in the cache
miss logic if the packet was smaller than a cache block.</p>

<p>``` {.sourceCode .c++}
void
SimpleCache::insert(PacketPtr pkt)
{
    if (cacheStore.size() &gt;= capacity) {
        // Select random thing to evict. This is a little convoluted since we
        // are using a std::unordered_map. See http://bit.ly/2hrnLP2
        int bucket, bucket_size;
        do {
            bucket = random_mt.random(0, (int)cacheStore.bucket_count() - 1);
        } while ( (bucket_size = cacheStore.bucket_size(bucket)) == 0 );
        auto block = std::next(cacheStore.begin(bucket),
                               random_mt.random(0, bucket_size - 1));</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>    RequestPtr req = new Request(block-&gt;first, blockSize, 0, 0);
    PacketPtr new_pkt = new Packet(req, MemCmd::WritebackDirty, blockSize);
    new_pkt-&gt;dataDynamic(block-&gt;second); // This will be deleted later

    DPRINTF(SimpleCache, "Writing packet back %s\n", pkt-&gt;print());
    memPort.sendTimingReq(new_pkt);

    cacheStore.erase(block-&gt;first);
}
uint8_t *data = new uint8_t[blockSize];
cacheStore[pkt-&gt;getAddr()] = data;

pkt-&gt;writeDataToBlock(data, blockSize); } ```
</code></pre></div></div>

<h2 id="creating-a-config-file-for-the-cache">Creating a config file for the cache</h2>

<p>The last step in our implementation is to create a new Python config
script that uses our cache. We can use the outline from the
last chapter &lt;memoryobject-chapter&gt; as a starting point. The only
difference is we may want to set the parameters of this cache (e.g., set
the size of the cache to <code class="highlighter-rouge">1kB</code>) and instead of using the named ports
(<code class="highlighter-rouge">data_port</code> and <code class="highlighter-rouge">inst_port</code>), we just use the <code class="highlighter-rouge">cpu_side</code> port twice.
Since <code class="highlighter-rouge">cpu_side</code> is a <code class="highlighter-rouge">VectorPort</code>, it will automatically create
multiple port connections.</p>

<p>``` {.sourceCode .python}
import m5
from m5.objects import *</p>

<p>…</p>

<p>system.cache = SimpleCache(size=’1kB’)</p>

<p>system.cpu.icache_port = system.cache.cpu_side
system.cpu.dcache_port = system.cache.cpu_side</p>

<p>system.membus = SystemXBar()</p>

<p>system.cache.mem_side = system.membus.slave</p>

<p>…</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
The Python config file can be downloaded
here \&lt;../\_static/scripts/part2/simplecache/simple\_cache.py\&gt;

Running this script should produce the expected output from the hello
binary.

    gem5 Simulator System.  http://gem5.org
    gem5 is copyrighted software; use the --copyright option for details.

    gem5 compiled Jan 10 2017 17:38:15
    gem5 started Jan 10 2017 17:40:03
    gem5 executing on chinook, pid 29031
    command line: build/X86/gem5.opt configs/learning_gem5/part2/simple_cache.py

    Global frequency set at 1000000000000 ticks per second
    warn: DRAM device capacity (8192 Mbytes) does not match the address range assigned (512 Mbytes)
    0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
    warn: CoherentXBar system.membus has no snooping ports attached!
    warn: ClockedObject: More than one power state change request encountered within the same simulation tick
    Beginning simulation!
    info: Entering event queue @ 0.  Starting simulation...
    Hello world!
    Exiting @ tick 56082000 because target called exit()

Modifying the size of the cache, for instance to 128 KB, should improve
the performance of the system.

    gem5 Simulator System.  http://gem5.org
    gem5 is copyrighted software; use the --copyright option for details.

    gem5 compiled Jan 10 2017 17:38:15
    gem5 started Jan 10 2017 17:41:10
    gem5 executing on chinook, pid 29037
    command line: build/X86/gem5.opt configs/learning_gem5/part2/simple_cache.py

    Global frequency set at 1000000000000 ticks per second
    warn: DRAM device capacity (8192 Mbytes) does not match the address range assigned (512 Mbytes)
    0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
    warn: CoherentXBar system.membus has no snooping ports attached!
    warn: ClockedObject: More than one power state change request encountered within the same simulation tick
    Beginning simulation!
    info: Entering event queue @ 0.  Starting simulation...
    Hello world!
    Exiting @ tick 32685000 because target called exit()

Adding statistics to the cache
------------------------------

Knowing the overall execution time of the system is one important
metric. However, you may want to include other statistics as well, such
as the hit and miss rates of the cache. To do this, we need to add some
statistics to the `SimpleCache` object.

First, we need to declare the statistics in the `SimpleCache` object.
They are part of the `Stats` namespace. In this case, we'll make four
statistics. The number of `hits` and the number of `misses` are just
simple `Scalar` counts. We will also add a `missLatency` which is a
histogram of the time it takes to satisfy a miss. Finally, we'll add a
special statistic called a `Formula` for the `hitRatio` that is a
combination of other statistics (the number of hits and misses).

``` {.sourceCode .c++}
class SimpleCache : public MemObject
{
  private:
    ...

    Tick missTime; // To track the miss latency

    Stats::Scalar hits;
    Stats::Scalar misses;
    Stats::Histogram missLatency;
    Stats::Formula hitRatio;

  public:
    ...

    void regStats() override;
};
</code></pre></div></div>

<p>Next, we have to define the function to override the <code class="highlighter-rouge">regStats</code> function
so the statistics are registered with gem5’s statistics infrastructure.
Here, for each statistic, we give it a name based on the “parent”
SimObject name and a description. For the histogram statistic, we also
need to initialize it with how many buckets we want in the histogram.
Finally, for the formula, we simply need to write the formula down in
code.</p>

<p>``` {.sourceCode .c++}
void
SimpleCache::regStats()
{
    // If you don’t do this you get errors about uninitialized stats.
    MemObject::regStats();</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>hits.name(name() + ".hits")
    .desc("Number of hits")
    ;

misses.name(name() + ".misses")
    .desc("Number of misses")
    ;

missLatency.name(name() + ".missLatency")
    .desc("Ticks for misses to the cache")
    .init(16) // number of buckets
    ;

hitRatio.name(name() + ".hitRatio")
    .desc("The ratio of hits to the total accesses to the cache")
    ;

hitRatio = hits / (hits + misses);
</code></pre></div></div>

<p>}</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
Finally, we need to use update the statistics in our code. In the
`accessTiming` class, we can increment the `hits` and `misses` on a hit
and miss respectively. Additionally, on a miss, we save the current time
so we can measure the latency.

``` {.sourceCode .c++}
void
SimpleCache::accessTiming(PacketPtr pkt)
{
    bool hit = accessFunctional(pkt);
    if (hit) {
        hits++; // update stats
        pkt-&gt;makeResponse();
        sendResponse(pkt);
    } else {
        misses++; // update stats
        missTime = curTick();
        ...
</code></pre></div></div>

<p>Then, when we get a response, we need to add the measured latency to our
histogram. For this, we use the <code class="highlighter-rouge">sample</code> function. This adds a single
point to the histogram. This histogram automatically resizes the buckets
to fit the data it receives.</p>

<p>``` {.sourceCode .c++}
bool
SimpleCache::handleResponse(PacketPtr pkt)
{
    insert(pkt);</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>missLatency.sample(curTick() - missTime);
... ```
</code></pre></div></div>

<p>The complete code for the <code class="highlighter-rouge">SimpleCache</code> header file can be downloaded
here &lt;../_static/scripts/part2/simplecache/simple_cache.hh&gt;, and the
complete code for the implementation of the <code class="highlighter-rouge">SimpleCache</code> can be
downloaded
here &lt;../_static/scripts/part2/simplecache/simple_cache.cc&gt;.</p>

<p>Now, if we run the above config file, we can check on the statistics in
the <code class="highlighter-rouge">stats.txt</code> file. For the 1 KB case, we get the following
statistics. 91% of the accesses are hits and the average miss latency is
53334 ticks (or 53 ns).</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>system.cache.hits                                8431                       # Number of hits
system.cache.misses                               877                       # Number of misses
system.cache.missLatency::samples                 877                       # Ticks for misses to the cache
system.cache.missLatency::mean           53334.093501                       # Ticks for misses to the cache
system.cache.missLatency::gmean          44506.409356                       # Ticks for misses to the cache
system.cache.missLatency::stdev          36749.446469                       # Ticks for misses to the cache
system.cache.missLatency::0-32767                 305     34.78%     34.78% # Ticks for misses to the cache
system.cache.missLatency::32768-65535             365     41.62%     76.40% # Ticks for misses to the cache
system.cache.missLatency::65536-98303             164     18.70%     95.10% # Ticks for misses to the cache
system.cache.missLatency::98304-131071             12      1.37%     96.47% # Ticks for misses to the cache
system.cache.missLatency::131072-163839            17      1.94%     98.40% # Ticks for misses to the cache
system.cache.missLatency::163840-196607             7      0.80%     99.20% # Ticks for misses to the cache
system.cache.missLatency::196608-229375             0      0.00%     99.20% # Ticks for misses to the cache
system.cache.missLatency::229376-262143             0      0.00%     99.20% # Ticks for misses to the cache
system.cache.missLatency::262144-294911             2      0.23%     99.43% # Ticks for misses to the cache
system.cache.missLatency::294912-327679             4      0.46%     99.89% # Ticks for misses to the cache
system.cache.missLatency::327680-360447             1      0.11%    100.00% # Ticks for misses to the cache
system.cache.missLatency::360448-393215             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::393216-425983             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::425984-458751             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::458752-491519             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::491520-524287             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::total                   877                       # Ticks for misses to the cache
system.cache.hitRatio                        0.905780                       # The ratio of hits to the total access
</code></pre></div></div>

<p>And when using a 128 KB cache, we get a slightly higher hit ratio. It
seems like our cache is working as expected!</p>

<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>system.cache.hits                                8944                       # Number of hits
system.cache.misses                               364                       # Number of misses
system.cache.missLatency::samples                 364                       # Ticks for misses to the cache
system.cache.missLatency::mean           64222.527473                       # Ticks for misses to the cache
system.cache.missLatency::gmean          61837.584812                       # Ticks for misses to the cache
system.cache.missLatency::stdev          27232.443748                       # Ticks for misses to the cache
system.cache.missLatency::0-32767                   0      0.00%      0.00% # Ticks for misses to the cache
system.cache.missLatency::32768-65535             254     69.78%     69.78% # Ticks for misses to the cache
system.cache.missLatency::65536-98303             106     29.12%     98.90% # Ticks for misses to the cache
system.cache.missLatency::98304-131071              0      0.00%     98.90% # Ticks for misses to the cache
system.cache.missLatency::131072-163839             0      0.00%     98.90% # Ticks for misses to the cache
system.cache.missLatency::163840-196607             0      0.00%     98.90% # Ticks for misses to the cache
system.cache.missLatency::196608-229375             0      0.00%     98.90% # Ticks for misses to the cache
system.cache.missLatency::229376-262143             0      0.00%     98.90% # Ticks for misses to the cache
system.cache.missLatency::262144-294911             2      0.55%     99.45% # Ticks for misses to the cache
system.cache.missLatency::294912-327679             1      0.27%     99.73% # Ticks for misses to the cache
system.cache.missLatency::327680-360447             1      0.27%    100.00% # Ticks for misses to the cache
system.cache.missLatency::360448-393215             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::393216-425983             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::425984-458751             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::458752-491519             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::491520-524287             0      0.00%    100.00% # Ticks for misses to the cache
system.cache.missLatency::total                   364                       # Ticks for misses to the cache
system.cache.hitRatio                        0.960894                       # The ratio of hits to the total access
</code></pre></div></div>

  <br>

  <!-- RETRIVE PREVIOUS PAGE LINK -->
  
    
  
    
  
    
      
      
        
          
            
          
        
          
            
          
        
          
            
          
        
          
            
          
        
          
            
          
        
          
            
          
        
          
            
      
    
  
    
  

  <!-- RETRIEVE NEXT PAGE LINK -->
  
    
  
    
  
    
      
      
        
          
            
      
    
  
    
  


  <div class="navbuttons">
    
      <a href="/memoryobject"><button type="button" class="btn btn-outline-primary">PREVIOUS</button></a>
    

    
  </div>
</div>

	</main>

	<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
	<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.3/umd/popper.min.js" integrity="sha384-ZMP7rVo3mIykV+2+9J3UJ46jBk0WLaUAdn689aCwoqbBJiSnjAK/l8WvCWPIPm49" crossorigin="anonymous"></script>
	<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.min.js" integrity="sha384-ChfqqxuZUCnJSK3+MXmPNIyE6ZbWh2IMqE241rYiqJxyMiZ6OW/JmZQ5stwEULTy" crossorigin="anonymous"></script>

	<script>
	  // When the user scrolls down 20px from the top of the document, show the button
	  window.onscroll = function() {scrollFunction()};

	  function scrollFunction() {
	      if (document.body.scrollTop > 100 || document.documentElement.scrollTop > 20) {
	          document.getElementById("myBtn").style.display = "block";
	      } else {
	          document.getElementById("myBtn").style.display = "none";
	      }
	  }

	  // When the user clicks on the button, scroll to the top of the document
	  function topFunction() {
	      document.body.scrollTop = 0;
	      document.documentElement.scrollTop = 0;
	  }
	</script>

</body>

<footer class="page-footer">
	<div class="container">
		<div class="row">

			<div class="col-12 col-sm-4">
				<p><a href="/about">About</a></p>
				<p><a href="/publications">Publications</a></p>
				<p><a href="/contributing">Contributing</a></p>
				<p><a href="/governance">Governance</a></p>
			</div><br>

			<div class="col-12 col-sm-4">
				<p><a href="/introduction">Documentation</a></p>
				<p><a href="http://gem5.org/Documentation">Old Documentation</a></p>
				<p><a href="https://gem5.googlesource.com/public/gem5">Source</a></p>
			</div><br>

			<div class="col-12 col-sm-4">
				<p><a href="/search">Search</a></p>
				<p><a href="#">Mailing Lists</a></p>
				<p><a href="#">Source For This Site</a></p>
			</div>

		</div>
	</div>
</footer>


</html>
