_site/simplecache/index.html - public/gem5-website - Git at Google

 <!DOCTYPE html>
 <html>
 <head>
 	<meta charset="utf-8">
 	<meta http-equiv="x-ua-compatible" content="ie=edge">
 	<meta name="viewport" content="width=device-width, initial-scale=1">

 	<title>gem5</title>

 	<!-- SITE FAVICON -->
 	<link rel="shortcut icon" type="image/gif" href="/assets/img/gem5ColorVert.gif"/>

 	<link rel="canonical" href="http://localhost:4000/simplecache/">
 	<link href='https://fonts.googleapis.com/css?family=Open+Sans:400,300,700,800,600' rel='stylesheet' type='text/css'>
 	<link href='https://fonts.googleapis.com/css?family=Muli:400,300' rel='stylesheet' type='text/css'>

 	<!-- FAVICON -->
 	<link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css">

 	<!-- BOOTSTRAP -->
 	<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">

 	<!-- CUSTOM CSS -->
 	<link rel="stylesheet" href="/css/main.css">
 </head>


 <body>
 	<nav class="navbar navbar-expand-md navbar-light bg-light">
   <a class="navbar-brand" href="/">
 		<img src="/assets/img/gem5ColorLong.gif" alt="gem5" height=45px>
 	</a>
   <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNavDropdown" aria-controls="navbarNavDropdown" aria-expanded="false" aria-label="Toggle navigation">
     <span class="navbar-toggler-icon"></span>
   </button>
   <div class="collapse navbar-collapse" id="navbarNavDropdown">
     <ul class="navbar-nav ml-auto">
       <li class="nav-item ">
         <a class="nav-link" href="/">Home</a>
       </li>

 			<li class="nav-item dropdown ">
 				<a class="nav-link dropdown-toggle" href="/about" id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
 					About
 				</a>
 				<div class="dropdown-menu" aria-labelledby="navbarDropdownMenuLink">
           <a class="dropdown-item" href="/about">About</a>
           <a class="dropdown-item" href="/publications">Publications</a>
           <a class="dropdown-item" href="/governance">Governance</a>
 				</div>
 			</li>

 			<li class="nav-item dropdown active">
 				<a class="nav-link dropdown-toggle" href="#" id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
 					Documentation
 				</a>
 				<div class="dropdown-menu" aria-labelledby="navbarDropdownMenuLink">
 					<!-- Pull navigation from _data/documentation.yml -->

             <a class="dropdown-item" href="/introduction">Introduction</a>

             <a class="dropdown-item" href="/building">Getting Started</a>

             <a class="dropdown-item" href="/environment">Modifying/Extending</a>

             <a class="dropdown-item" href="/MSIintro">Modeling Cache Coherence with Ruby</a>

 				</div>
 			</li>

       <li class="nav-item ">
         <a class="nav-link" href="/contributing">Contributing</a>
       </li>

 			<li class="nav-item ">
         <a class="nav-link" href="/search">Search</a>
       </li>
     </ul>
   </div>
 </nav>

 	<main>
 		<div class="sidenav-top">
   <img src="/assets/img/gem5ColorLong.gif" height="80">
   <div class="search">
     <form action="/search" method="get">
       <!-- <label for="search-box"><i class="fa fa-search"></i></label> -->
       <input type="text" name="query">
       <button type="submit" name="submit"><i class="fa fa-search"></i></button>
     </form>
   </div>
 </div>
 <div class="sidenav">
   <!-- Pull navigation from _data/documentation.yml -->

     <a class="item" href="/introduction" role="button" aria-expanded="false" aria-controls="collapseExample">
       Introduction
     </a>
     <div class="collapse " id="introdution">

     </div>

     <a class="item" data-toggle="collapse" href="#pt1" role="button" aria-expanded="false" aria-controls="collapseExample">
       Getting Started
     </a>
     <div class="collapse " id="pt1">

         <a class="subitem " href="/building">Building gem5</a>

         <a class="subitem " href="/simple_config">Creating a simple configuration script</a>

         <a class="subitem " href="/cache_config">Adding cache to configuration script</a>

         <a class="subitem " href="/gem5_stats">Understanding gem5 statistics and output</a>

         <a class="subitem " href="/example_configs">Using the default configuration scripts</a>

     </div>

     <a class="item" data-toggle="collapse" href="#pt2" role="button" aria-expanded="false" aria-controls="collapseExample">
       Modifying/Extending
     </a>
     <div class="collapse show" id="pt2">

         <a class="subitem " href="/environment">Setting up your development environment</a>

         <a class="subitem " href="/helloobject">Creating a very simple SimObject</a>

         <a class="subitem " href="/debugging">Debugging gem5</a>

         <a class="subitem " href="/events">Event-driven programming</a>

         <a class="subitem " href="/parameters">Adding parameters to SimObjects and more events</a>

         <a class="subitem " href="/memoryobject">Creating SimObjects in the memory system</a>

         <a class="subitem active" href="/simplecache">Creating a simple cache object</a>

     </div>

     <a class="item" data-toggle="collapse" href="#pt3" role="button" aria-expanded="false" aria-controls="collapseExample">
       Modeling Cache Coherence with Ruby
     </a>
     <div class="collapse " id="pt3">

         <a class="subitem " href="/MSIintro">Introduction to Ruby</a>

         <a class="subitem " href="/cache-intro">MSI example cache protocol</a>

         <a class="subitem " href="/cache-declarations">Declaring a state machine</a>

         <a class="subitem " href="/cache-in-ports">In port code blocks</a>

         <a class="subitem " href="/cache-actions">Action code blocks</a>

         <a class="subitem " href="/cache-transitions">Transition code blocks</a>

         <a class="subitem " href="/directory">MSI Directory implementation</a>

         <a class="subitem " href="/MSIbuilding">Compiling a SLICC protocol</a>

         <a class="subitem " href="/configuration">Configuring a simple Ruby system</a>

         <a class="subitem " href="/running">Running the simple Ruby system</a>

         <a class="subitem " href="/MSIdebugging">Debugging SLICC Protocols</a>

         <a class="subitem " href="/simple-MI_example">Configuring for a standard protocol</a>

     </div>

 </div>

 <div class="container" id="doc-container">

   <!-- <h1>Creating a simple cache object</h1> -->
   <dl>
   <dt>authors</dt>
   <dd>Jason Lowe-Power</dd>
 </dl>

 <h1 id="creating-a-simple-cache-object">Creating a simple cache object</h1>

 <p>In this chapter, we will take the framework for a memory object we
 created in the last chapter &lt;memoryobject-chapter&gt; and add caching
 logic to it.</p>

 <h2 id="simplecache-simobject">SimpleCache SimObject</h2>

 <p>After creating the SConscript file, that you can download
 here &lt;../_static/scripts/part2/simplecache/SConscript&gt;, we can create
 the SimObject Python file. We will call this simple memory object
 <code class="highlighter-rouge">SimpleCache</code> and create the SimObject Python file in
 <code class="highlighter-rouge">src/learning_gem5/simple_cache</code>.</p>

 <p>``` {.sourceCode .python}
 from m5.params import *
 from m5.proxy import *
 from MemObject import MemObject</p>

 <p>class SimpleCache(MemObject):
     type = ‘SimpleCache’
     cxx_header = “learning_gem5/simple_cache/simple_cache.hh”</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>cpu_side = VectorSlavePort("CPU side port, receives requests")
 mem_side = MasterPort("Memory side port, sends requests")

 latency = Param.Cycles(1, "Cycles taken on a hit or to resolve a miss")

 size = Param.MemorySize('16kB', "The size of the cache")

 system = Param.System(Parent.any, "The system this cache is part of") ```
 </code></pre></div></div>

 <p>There are a couple of differences between this SimObject file and the
 one from the previous chapter &lt;memoryobject-chapter&gt;. First, we have a
 couple of extra parameters. Namely, a latency for cache accesses and the
 size of the cache. parameters-chapter goes into more detail about these
 kinds of SimObject parameters.</p>

 <p>Next, we include a <code class="highlighter-rouge">System</code> parameter, which is a pointer to the main
 system this cache is connected to. This is needed so we can get the
 cache block size from the system object when we are initializing the
 cache. To reference the system object this cache is connected to, we use
 a special <em>proxy parameter</em>. In this case, we use <code class="highlighter-rouge">Parent.any</code>.</p>

 <p>In the Python config file, when a <code class="highlighter-rouge">SimpleCache</code> is instantiated, this
 proxy parameter searches through all of the parents of the <code class="highlighter-rouge">SimpleCache</code>
 instance to find a SimObject that matches the <code class="highlighter-rouge">System</code> type. Since we
 often use a <code class="highlighter-rouge">System</code> as the root SimObject, you will often see a
 <code class="highlighter-rouge">system</code> parameter resolved with this proxy parameter.</p>

 <p>The third and final difference between the <code class="highlighter-rouge">SimpleCache</code> and the
 <code class="highlighter-rouge">SimpleMemobj</code> is that instead of having two named CPU ports
 (<code class="highlighter-rouge">inst_port</code> and <code class="highlighter-rouge">data_port</code>), the <code class="highlighter-rouge">SimpleCache</code> use another special
 parameter: the <code class="highlighter-rouge">VectorPort</code>. <code class="highlighter-rouge">VectorPorts</code> behave similarly to regular
 ports (e.g., they are resolved via <code class="highlighter-rouge">getMasterPort</code> and <code class="highlighter-rouge">getSlavePort</code>),
 but they allow this object to connect to multiple peers. Then, in the
 resolution functions the parameter we ignored before (<code class="highlighter-rouge">PortID idx</code>) is
 used to differentiate between the different ports. By using a vector
 port, this cache can be connected into the system more flexibly than the
 <code class="highlighter-rouge">SimpleMemobj</code>.</p>

 <h2 id="implementing-the-simplecache">Implementing the SimpleCache</h2>

 <p>Most of the code for the <code class="highlighter-rouge">`SimpleCache</code> is the same as the
 <code class="highlighter-rouge">SimpleMemobj</code>. There are a couple of changes in the constructor and the
 key memory object functions.</p>

 <p>First, we need to create the CPU side ports dynamically in the
 constructor and initialize the extra member functions based on the
 SimObject parameters.</p>

 <p>``` {.sourceCode .c++}
 SimpleCache::SimpleCache(SimpleCacheParams *params) :
     MemObject(params),
     latency(params-&gt;latency),
     blockSize(params-&gt;system-&gt;cacheLineSize()),
     capacity(params-&gt;size / blockSize),
     memPort(params-&gt;name + “.mem_side”, this),
     blocked(false), outstandingPacket(nullptr), waitingPortId(-1)
 {
     for (int i = 0; i &lt; params-&gt;port_cpu_side_connection_count; ++i) {
         cpuPorts.emplace_back(name() + csprintf(“.cpu_side[%d]”, i), i, this);
     }
 }</p>
 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
 In this function, we use the `cacheLineSize` from the system parameters
 to set the `blockSize` for this cache. We also initialize the capacity
 based on the block size and the parameter and initialize other member
 variables we will need below. Finally, we must create a number of
 `CPUSidePorts` based on the number of connections to this object. Since
 the `cpu_side` port was declared as a `VectorSlavePort` in the SimObject
 Python file, the parameter automatically has a variable
 `port_cpu_side_connection_count`. This is based on the Python name of
 the parameter. For each of these connections we add a new `CPUSidePort`
 to a `cpuPorts` vector declared in the `SimpleCache` class.

 We also add one extra member variable to the `CPUSidePort` to save its
 id, and we add this as a parameter to its constructor.

 Next, we need to implement `getMasterPort` and `getSlavePort`. The
 `getMasterPort` is exactly the same as the `SimpleMemobj`. For
 `getSlavePort`, we now need to return the port based on the id
 requested.

 ``` {.sourceCode .c++}
 BaseSlavePort&amp;
 SimpleCache::getSlavePort(const std::string&amp; if_name, PortID idx)
 {
     if (if_name == "cpu_side" &amp;&amp; idx &lt; cpuPorts.size()) {
         return cpuPorts[idx];
     } else {
         return MemObject::getSlavePort(if_name, idx);
     }
 }
 </code></pre></div></div>

 <p>The implementation of the <code class="highlighter-rouge">CPUSidePort</code> and the <code class="highlighter-rouge">MemSidePort</code> is almost
 the same as in the <code class="highlighter-rouge">SimpleMemobj</code>. The only difference is we need to add
 an extra parameter to <code class="highlighter-rouge">handleRequest</code> that is the id of the port which
 the request originated. Without this id, we would not be able to forward
 the response to the correct port. The <code class="highlighter-rouge">SimpleMemobj</code> knew which port to
 send replies based on whether the original request was an instruction or
 data accesses. However, this information is not useful to the
 <code class="highlighter-rouge">SimpleCache</code> since it uses a vector of ports and not named ports.</p>

 <p>The new <code class="highlighter-rouge">handleRequest</code> function does two different things than the
 <code class="highlighter-rouge">handleRequest</code> function in the <code class="highlighter-rouge">SimpleMemobj</code>. First, it stores the
 port id of the request as discussed above. Since the <code class="highlighter-rouge">SimpleCache</code> is
 blocking and only allows a single request outstanding at a time, we only
 need to save a single port id.</p>

 <p>Second, it takes time to access a cache. Therefore, we need to take into
 account the latency to access the cache tags and the cache data for a
 request. We added an extra parameter to the cache object for this, and
 in <code class="highlighter-rouge">handleRequest</code> we now use an event to stall the request for the
 needed amount of time. We schedule a new event for <code class="highlighter-rouge">latency</code> cycles in
 the future. The <code class="highlighter-rouge">clockEdge</code> function returns the <em>tick</em> that the <em>nth</em>
 cycle in the future occurs on.</p>

 <p>``` {.sourceCode .c++}
 bool
 SimpleCache::handleRequest(PacketPtr pkt, int port_id)
 {
     if (blocked) {
         return false;
     }
     DPRINTF(SimpleCache, “Got request for addr %#x\n”, pkt-&gt;getAddr());</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>blocked = true;
 waitingPortId = port_id;

 schedule(new AccessEvent(this, pkt), clockEdge(latency));

 return true; } ```
 </code></pre></div></div>

 <p>The <code class="highlighter-rouge">AccessEvent</code> is a little more complicated than the <code class="highlighter-rouge">EventWrapper</code>
 we used in events-chapter. Instead of using an <code class="highlighter-rouge">EventWrapper</code>, in the
 <code class="highlighter-rouge">SimpleCache</code> we will use a new class. The reason we cannot use an
 <code class="highlighter-rouge">EventWrapper</code>, is that we need to pass the packet (<code class="highlighter-rouge">pkt</code>) from
 <code class="highlighter-rouge">handleRequest</code> to the event handler function. The following code is the
 <code class="highlighter-rouge">AccessEvent</code> class. We only need to implement the <code class="highlighter-rouge">process</code> function,
 that calls the function we want to use as our event handler, in this
 case <code class="highlighter-rouge">accessTming</code>. We also pass the flag <code class="highlighter-rouge">AutoDelete</code> to the event
 constructor so we do not need to worry about freeing the memory for the
 dynamically created object. The event code will automatically delete the
 object after the <code class="highlighter-rouge">process</code> function has executed.</p>

 <p>``` {.sourceCode .c++}
 class AccessEvent : public Event
 {
   private:
     SimpleCache *cache;
     PacketPtr pkt;
   public:
     AccessEvent(SimpleCache *cache, PacketPtr pkt) :
         Event(Default_Pri, AutoDelete), cache(cache), pkt(pkt)
     { }
     void process() override {
         cache-&gt;accessTiming(pkt);
     }
 };</p>
 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
 Now, we need to implement the event handler, `accessTiming`.

 ``` {.sourceCode .c++}
 void
 SimpleCache::accessTiming(PacketPtr pkt)
 {
     bool hit = accessFunctional(pkt);
     if (hit) {
         pkt-&gt;makeResponse();
         sendResponse(pkt);
     } else {
         &lt;miss handling&gt;
     }
 }
 </code></pre></div></div>

 <p>This function first <em>functionally</em> accesses the cache. This function
 <code class="highlighter-rouge">accessFunctional</code> (described below) performs the functional access of
 the cache and either reads or writes the cache on a hit or returns that
 the access was a miss.</p>

 <p>If the access is a hit, we simply need to respond to the packet. To
 respond, you first must call the function <code class="highlighter-rouge">makeResponse</code> on the packet.
 This converts the packet from a request packet to a response packet. For
 instance, if the memory command in the packet was a <code class="highlighter-rouge">ReadReq</code> this gets
 converted into a <code class="highlighter-rouge">ReadResp</code>. Writes behave similarly. Then, we can send
 the response back to the CPU.</p>

 <p>The <code class="highlighter-rouge">sendResponse</code> function does the same things as the <code class="highlighter-rouge">handleResponse</code>
 function in the <code class="highlighter-rouge">SimpleMemobj</code> except that it uses the <code class="highlighter-rouge">waitingPortId</code>
 to send the packet to the right port. In this function, we need to mark
 the <code class="highlighter-rouge">SimpleCache</code> unblocked before calling <code class="highlighter-rouge">sendPacket</code> in case the peer
 on the CPU side immediately calls <code class="highlighter-rouge">sendTimingReq</code>. Then, we try to send
 retries to the CPU side ports if the <code class="highlighter-rouge">SimpleCache</code> can now receive
 requests and the ports need to be sent retries.</p>

 <p>``` {.sourceCode .c++}
 void SimpleCache::sendResponse(PacketPtr pkt)
 {
     int port = waitingPortId;</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>blocked = false;
 waitingPortId = -1;

 cpuPorts[port].sendPacket(pkt);
 for (auto&amp; port : cpuPorts) {
     port.trySendRetry();
 } } ```
 </code></pre></div></div>

 <hr />

 <p>Back to the <code class="highlighter-rouge">accessTiming</code> function, we now need to handle the cache
 miss case. On a miss, we first have to check to see if the missing
 packet is to an entire cache block. If the packet is aligned and the
 size of the request is the size of a cache block, then we can simply
 forward the request to memory, just like in the <code class="highlighter-rouge">SimpleMemobj</code>.</p>

 <p>However, if the packet is smaller than a cache block, then we need to
 create a new packet to read the entire cache block from memory. Here,
 whether the packet is a read or a write request, we send a read request
 to memory to load the data for the cache block into the cache. In the
 case of a write, it will occur in the cache after we have loaded the
 data from memory.</p>

 <p>Then, we create a new packet, that is <code class="highlighter-rouge">blockSize</code> in size and we call
 the <code class="highlighter-rouge">allocate</code> function to allocate memory in the <code class="highlighter-rouge">Packet</code> object for
 the data that we will read from memory. Note: this memory is freed when
 we free the packet. We use the original request object in the packet so
 the memory-side objects know the original requestor and the original
 request type for statistics.</p>

 <p>Finally, we save the original packet pointer (<code class="highlighter-rouge">pkt</code>) in a member
 variable <code class="highlighter-rouge">outstandingPacket</code> so we can recover it when the <code class="highlighter-rouge">SimpleCache</code>
 receives a response. Then, we send the new packet across the memory side
 port.</p>

 <p>``` {.sourceCode .c++}
 void
 SimpleCache::accessTiming(PacketPtr pkt)
 {
     bool hit = accessFunctional(pkt);
     if (hit) {
         pkt-&gt;makeResponse();
         sendResponse(pkt);
     } else {
         Addr addr = pkt-&gt;getAddr();
         Addr block_addr = pkt-&gt;getBlockAddr(blockSize);
         unsigned size = pkt-&gt;getSize();
         if (addr == block_addr &amp;&amp; size == blockSize) {
             DPRINTF(SimpleCache, “forwarding packet\n”);
             memPort.sendPacket(pkt);
         } else {
             DPRINTF(SimpleCache, “Upgrading packet to block size\n”);
             panic_if(addr - block_addr + size &gt; blockSize,
                      “Cannot handle accesses that span multiple cache lines”);</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>        assert(pkt-&gt;needsResponse());
         MemCmd cmd;
         if (pkt-&gt;isWrite() || pkt-&gt;isRead()) {
             cmd = MemCmd::ReadReq;
         } else {
             panic("Unknown packet type in upgrade size");
         }

         PacketPtr new_pkt = new Packet(pkt-&gt;req, cmd, blockSize);
         new_pkt-&gt;allocate();

         outstandingPacket = pkt;

         memPort.sendPacket(new_pkt);
     }
 } } ```
 </code></pre></div></div>

 <p>On a response from memory, we know that this was caused by a cache miss.
 The first step is to insert the responding packet into the cache.</p>

 <p>Then, either there is an <code class="highlighter-rouge">outstandingPacket</code>, in which case we need to
 forward that packet to the original requestor, or there is no
 <code class="highlighter-rouge">outstandingPacket</code> which means we should forward the <code class="highlighter-rouge">pkt</code> in the
 response to the original requestor.</p>

 <p>If the packet we are receiving as a response was an upgrade packet
 because the original request was smaller than a cache line, then we need
 to copy the new data to the outstandingPacket packet or write to the
 cache on a write. Then, we need to delete the new packet that we made in
 the miss handling logic.</p>

 <p>``` {.sourceCode .c++}
 bool
 SimpleCache::handleResponse(PacketPtr pkt)
 {
     assert(blocked);
     DPRINTF(SimpleCache, “Got response for addr %#x\n”, pkt-&gt;getAddr());
     insert(pkt);</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>if (outstandingPacket != nullptr) {
     accessFunctional(outstandingPacket);
     outstandingPacket-&gt;makeResponse();
     delete pkt;
     pkt = outstandingPacket;
     outstandingPacket = nullptr;
 } // else, pkt contains the data it needs

 sendResponse(pkt);

 return true; } ```
 </code></pre></div></div>

 <h3 id="functional-cache-logic">Functional cache logic</h3>

 <p>Now, we need to implement two more functions: <code class="highlighter-rouge">accessFunctional</code> and
 <code class="highlighter-rouge">insert</code>. These two functions make up the key components of the cache
 logic.</p>

 <p>First, to functionally update the cache, we first need storage for the
 cache contents. The simplest possible cache storage is a map (hashtable)
 that maps from addresses to data. Thus, we will add the following member
 to the <code class="highlighter-rouge">SimpleCache</code>.</p>

 <p>``` {.sourceCode .c++}
 std::unordered_map&lt;Addr, uint8_t*&gt; cacheStore;</p>
 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
 To access the cache, we first check to see if there is an entry in the
 map which matches the address in the packet. We use the `getBlockAddr`
 function of the `Packet` type to get the block-aligned address. Then, we
 simply search for that address in the map. If we do not find the
 address, then this function returns `false`, the data is not in the
 cache, and it is a miss.

 Otherwise, if the packet is a write request, we need to update the data
 in the cache. To do this, we write the data from the packet to the
 cache. We use the `writeDataToBlock` function which writes the data in
 the packet to the write offset into a potentially larger block of data.
 This function takes the cache block offset and the block size (as a
 parameter) and writes the correct offset into the pointer passed as the
 first parameter.

 If the packet is a read request, we need to update the packet's data
 with the data from the cache. The `setDataFromBlock` function performs
 the same offset calculation as the `writeDataToBlock` function, but
 writes the packet with the data from the pointer in the first parameter.

 ``` {.sourceCode .c++}
 bool
 SimpleCache::accessFunctional(PacketPtr pkt)
 {
     Addr block_addr = pkt-&gt;getBlockAddr(blockSize);
     auto it = cacheStore.find(block_addr);
     if (it != cacheStore.end()) {
         if (pkt-&gt;isWrite()) {
             pkt-&gt;writeDataToBlock(it-&gt;second, blockSize);
         } else if (pkt-&gt;isRead()) {
             pkt-&gt;setDataFromBlock(it-&gt;second, blockSize);
         } else {
             panic("Unknown packet type!");
         }
         return true;
     }
     return false;
 }
 </code></pre></div></div>

 <p>Finally, we also need to implement the <code class="highlighter-rouge">insert</code> function. This function
 is called every time the memory side port responds to a request.</p>

 <p>The first step is to check if the cache is currently full. If the cache
 has more entries (blocks) than the capacity of the cache as set by the
 SimObject parameter, then we need to evict something. The following code
 evicts a random entry by leveraging the hashtable implementation of the
 C++ <code class="highlighter-rouge">unordered_map</code>.</p>

 <p>On an eviction, we need to write the data back to the backing memory in
 case it has been updated. For this, we create a new <code class="highlighter-rouge">Request</code>-<code class="highlighter-rouge">Packet</code>
 pair. The packet uses a new memory command: <code class="highlighter-rouge">MemCmd::WritebackDirty</code>.
 Then, we send the packet across the memory side port (<code class="highlighter-rouge">memPort</code>) and
 erase the entry in the cache storage map.</p>

 <p>Then, after a block has potentially been evicted, we add the new address
 to the cache. For this we simply allocate space for the block and add an
 entry to the map. Finally, we write the data from the response packet in
 to the newly allocated block. This data is guaranteed to be the size of
 the cache block since we made sure to make a new packet in the cache
 miss logic if the packet was smaller than a cache block.</p>

 <p>``` {.sourceCode .c++}
 void
 SimpleCache::insert(PacketPtr pkt)
 {
     if (cacheStore.size() &gt;= capacity) {
         // Select random thing to evict. This is a little convoluted since we
         // are using a std::unordered_map. See http://bit.ly/2hrnLP2
         int bucket, bucket_size;
         do {
             bucket = random_mt.random(0, (int)cacheStore.bucket_count() - 1);
         } while ( (bucket_size = cacheStore.bucket_size(bucket)) == 0 );
         auto block = std::next(cacheStore.begin(bucket),
                                random_mt.random(0, bucket_size - 1));</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>    RequestPtr req = new Request(block-&gt;first, blockSize, 0, 0);
     PacketPtr new_pkt = new Packet(req, MemCmd::WritebackDirty, blockSize);
     new_pkt-&gt;dataDynamic(block-&gt;second); // This will be deleted later

     DPRINTF(SimpleCache, "Writing packet back %s\n", pkt-&gt;print());
     memPort.sendTimingReq(new_pkt);

     cacheStore.erase(block-&gt;first);
 }
 uint8_t *data = new uint8_t[blockSize];
 cacheStore[pkt-&gt;getAddr()] = data;

 pkt-&gt;writeDataToBlock(data, blockSize); } ```
 </code></pre></div></div>

 <h2 id="creating-a-config-file-for-the-cache">Creating a config file for the cache</h2>

 <p>The last step in our implementation is to create a new Python config
 script that uses our cache. We can use the outline from the
 last chapter &lt;memoryobject-chapter&gt; as a starting point. The only
 difference is we may want to set the parameters of this cache (e.g., set
 the size of the cache to <code class="highlighter-rouge">1kB</code>) and instead of using the named ports
 (<code class="highlighter-rouge">data_port</code> and <code class="highlighter-rouge">inst_port</code>), we just use the <code class="highlighter-rouge">cpu_side</code> port twice.
 Since <code class="highlighter-rouge">cpu_side</code> is a <code class="highlighter-rouge">VectorPort</code>, it will automatically create
 multiple port connections.</p>

 <p>``` {.sourceCode .python}
 import m5
 from m5.objects import *</p>

 <p>…</p>

 <p>system.cache = SimpleCache(size=’1kB’)</p>

 <p>system.cpu.icache_port = system.cache.cpu_side
 system.cpu.dcache_port = system.cache.cpu_side</p>

 <p>system.membus = SystemXBar()</p>

 <p>system.cache.mem_side = system.membus.slave</p>

 <p>…</p>
 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
 The Python config file can be downloaded
 here \&lt;../\_static/scripts/part2/simplecache/simple\_cache.py\&gt;

 Running this script should produce the expected output from the hello
 binary.

     gem5 Simulator System.  http://gem5.org
     gem5 is copyrighted software; use the --copyright option for details.

     gem5 compiled Jan 10 2017 17:38:15
     gem5 started Jan 10 2017 17:40:03
     gem5 executing on chinook, pid 29031
     command line: build/X86/gem5.opt configs/learning_gem5/part2/simple_cache.py

     Global frequency set at 1000000000000 ticks per second
     warn: DRAM device capacity (8192 Mbytes) does not match the address range assigned (512 Mbytes)
     0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
     warn: CoherentXBar system.membus has no snooping ports attached!
     warn: ClockedObject: More than one power state change request encountered within the same simulation tick
     Beginning simulation!
     info: Entering event queue @ 0.  Starting simulation...
     Hello world!
     Exiting @ tick 56082000 because target called exit()

 Modifying the size of the cache, for instance to 128 KB, should improve
 the performance of the system.

     gem5 Simulator System.  http://gem5.org
     gem5 is copyrighted software; use the --copyright option for details.

     gem5 compiled Jan 10 2017 17:38:15
     gem5 started Jan 10 2017 17:41:10
     gem5 executing on chinook, pid 29037
     command line: build/X86/gem5.opt configs/learning_gem5/part2/simple_cache.py

     Global frequency set at 1000000000000 ticks per second
     warn: DRAM device capacity (8192 Mbytes) does not match the address range assigned (512 Mbytes)
     0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000
     warn: CoherentXBar system.membus has no snooping ports attached!
     warn: ClockedObject: More than one power state change request encountered within the same simulation tick
     Beginning simulation!
     info: Entering event queue @ 0.  Starting simulation...
     Hello world!
     Exiting @ tick 32685000 because target called exit()

 Adding statistics to the cache
 ------------------------------

 Knowing the overall execution time of the system is one important
 metric. However, you may want to include other statistics as well, such
 as the hit and miss rates of the cache. To do this, we need to add some
 statistics to the `SimpleCache` object.

 First, we need to declare the statistics in the `SimpleCache` object.
 They are part of the `Stats` namespace. In this case, we'll make four
 statistics. The number of `hits` and the number of `misses` are just
 simple `Scalar` counts. We will also add a `missLatency` which is a
 histogram of the time it takes to satisfy a miss. Finally, we'll add a
 special statistic called a `Formula` for the `hitRatio` that is a
 combination of other statistics (the number of hits and misses).

 ``` {.sourceCode .c++}
 class SimpleCache : public MemObject
 {
   private:
     ...

     Tick missTime; // To track the miss latency

     Stats::Scalar hits;
     Stats::Scalar misses;
     Stats::Histogram missLatency;
     Stats::Formula hitRatio;

   public:
     ...

     void regStats() override;
 };
 </code></pre></div></div>

 <p>Next, we have to define the function to override the <code class="highlighter-rouge">regStats</code> function
 so the statistics are registered with gem5’s statistics infrastructure.
 Here, for each statistic, we give it a name based on the “parent”
 SimObject name and a description. For the histogram statistic, we also
 need to initialize it with how many buckets we want in the histogram.
 Finally, for the formula, we simply need to write the formula down in
 code.</p>

 <p>``` {.sourceCode .c++}
 void
 SimpleCache::regStats()
 {
     // If you don’t do this you get errors about uninitialized stats.
     MemObject::regStats();</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>hits.name(name() + ".hits")
     .desc("Number of hits")
     ;

 misses.name(name() + ".misses")
     .desc("Number of misses")
     ;

 missLatency.name(name() + ".missLatency")
     .desc("Ticks for misses to the cache")
     .init(16) // number of buckets
     ;

 hitRatio.name(name() + ".hitRatio")
     .desc("The ratio of hits to the total accesses to the cache")
     ;

 hitRatio = hits / (hits + misses);
 </code></pre></div></div>

 <p>}</p>
 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
 Finally, we need to use update the statistics in our code. In the
 `accessTiming` class, we can increment the `hits` and `misses` on a hit
 and miss respectively. Additionally, on a miss, we save the current time
 so we can measure the latency.

 ``` {.sourceCode .c++}
 void
 SimpleCache::accessTiming(PacketPtr pkt)
 {
     bool hit = accessFunctional(pkt);
     if (hit) {
         hits++; // update stats
         pkt-&gt;makeResponse();
         sendResponse(pkt);
     } else {
         misses++; // update stats
         missTime = curTick();
         ...
 </code></pre></div></div>

 <p>Then, when we get a response, we need to add the measured latency to our
 histogram. For this, we use the <code class="highlighter-rouge">sample</code> function. This adds a single
 point to the histogram. This histogram automatically resizes the buckets
 to fit the data it receives.</p>

 <p>``` {.sourceCode .c++}
 bool
 SimpleCache::handleResponse(PacketPtr pkt)
 {
     insert(pkt);</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>missLatency.sample(curTick() - missTime);
 ... ```
 </code></pre></div></div>

 <p>The complete code for the <code class="highlighter-rouge">SimpleCache</code> header file can be downloaded
 here &lt;../_static/scripts/part2/simplecache/simple_cache.hh&gt;, and the
 complete code for the implementation of the <code class="highlighter-rouge">SimpleCache</code> can be
 downloaded
 here &lt;../_static/scripts/part2/simplecache/simple_cache.cc&gt;.</p>

 <p>Now, if we run the above config file, we can check on the statistics in
 the <code class="highlighter-rouge">stats.txt</code> file. For the 1 KB case, we get the following
 statistics. 91% of the accesses are hits and the average miss latency is
 53334 ticks (or 53 ns).</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>system.cache.hits                                8431                       # Number of hits
 system.cache.misses                               877                       # Number of misses
 system.cache.missLatency::samples                 877                       # Ticks for misses to the cache
 system.cache.missLatency::mean           53334.093501                       # Ticks for misses to the cache
 system.cache.missLatency::gmean          44506.409356                       # Ticks for misses to the cache
 system.cache.missLatency::stdev          36749.446469                       # Ticks for misses to the cache
 system.cache.missLatency::0-32767                 305     34.78%     34.78% # Ticks for misses to the cache
 system.cache.missLatency::32768-65535             365     41.62%     76.40% # Ticks for misses to the cache
 system.cache.missLatency::65536-98303             164     18.70%     95.10% # Ticks for misses to the cache
 system.cache.missLatency::98304-131071             12      1.37%     96.47% # Ticks for misses to the cache
 system.cache.missLatency::131072-163839            17      1.94%     98.40% # Ticks for misses to the cache
 system.cache.missLatency::163840-196607             7      0.80%     99.20% # Ticks for misses to the cache
 system.cache.missLatency::196608-229375             0      0.00%     99.20% # Ticks for misses to the cache
 system.cache.missLatency::229376-262143             0      0.00%     99.20% # Ticks for misses to the cache
 system.cache.missLatency::262144-294911             2      0.23%     99.43% # Ticks for misses to the cache
 system.cache.missLatency::294912-327679             4      0.46%     99.89% # Ticks for misses to the cache
 system.cache.missLatency::327680-360447             1      0.11%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::360448-393215             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::393216-425983             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::425984-458751             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::458752-491519             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::491520-524287             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::total                   877                       # Ticks for misses to the cache
 system.cache.hitRatio                        0.905780                       # The ratio of hits to the total access
 </code></pre></div></div>

 <p>And when using a 128 KB cache, we get a slightly higher hit ratio. It
 seems like our cache is working as expected!</p>

 <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>system.cache.hits                                8944                       # Number of hits
 system.cache.misses                               364                       # Number of misses
 system.cache.missLatency::samples                 364                       # Ticks for misses to the cache
 system.cache.missLatency::mean           64222.527473                       # Ticks for misses to the cache
 system.cache.missLatency::gmean          61837.584812                       # Ticks for misses to the cache
 system.cache.missLatency::stdev          27232.443748                       # Ticks for misses to the cache
 system.cache.missLatency::0-32767                   0      0.00%      0.00% # Ticks for misses to the cache
 system.cache.missLatency::32768-65535             254     69.78%     69.78% # Ticks for misses to the cache
 system.cache.missLatency::65536-98303             106     29.12%     98.90% # Ticks for misses to the cache
 system.cache.missLatency::98304-131071              0      0.00%     98.90% # Ticks for misses to the cache
 system.cache.missLatency::131072-163839             0      0.00%     98.90% # Ticks for misses to the cache
 system.cache.missLatency::163840-196607             0      0.00%     98.90% # Ticks for misses to the cache
 system.cache.missLatency::196608-229375             0      0.00%     98.90% # Ticks for misses to the cache
 system.cache.missLatency::229376-262143             0      0.00%     98.90% # Ticks for misses to the cache
 system.cache.missLatency::262144-294911             2      0.55%     99.45% # Ticks for misses to the cache
 system.cache.missLatency::294912-327679             1      0.27%     99.73% # Ticks for misses to the cache
 system.cache.missLatency::327680-360447             1      0.27%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::360448-393215             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::393216-425983             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::425984-458751             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::458752-491519             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::491520-524287             0      0.00%    100.00% # Ticks for misses to the cache
 system.cache.missLatency::total                   364                       # Ticks for misses to the cache
 system.cache.hitRatio                        0.960894                       # The ratio of hits to the total access
 </code></pre></div></div>


 </div>

 	</main>

 	<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
 	<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.3/umd/popper.min.js" integrity="sha384-ZMP7rVo3mIykV+2+9J3UJ46jBk0WLaUAdn689aCwoqbBJiSnjAK/l8WvCWPIPm49" crossorigin="anonymous"></script>
 	<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.min.js" integrity="sha384-ChfqqxuZUCnJSK3+MXmPNIyE6ZbWh2IMqE241rYiqJxyMiZ6OW/JmZQ5stwEULTy" crossorigin="anonymous"></script>

 	<script>
 	  // When the user scrolls down 20px from the top of the document, show the button
 	  window.onscroll = function() {scrollFunction()};

 	  function scrollFunction() {
 	      if (document.body.scrollTop > 100 || document.documentElement.scrollTop > 20) {
 	          document.getElementById("myBtn").style.display = "block";
 	      } else {
 	          document.getElementById("myBtn").style.display = "none";
 	      }
 	  }

 	  // When the user clicks on the button, scroll to the top of the document
 	  function topFunction() {
 	      document.body.scrollTop = 0;
 	      document.documentElement.scrollTop = 0;
 	  }
 	</script>

 </body>

 <footer class="page-footer">
 	<div class="container">
 		<div class="row">

 			<div class="col-12 col-sm-4">
 				<p><a href="/about">About</a></p>
 				<p><a href="/publications">Publications</a></p>
 				<p><a href="/contributing">Contributing</a></p>
 			</div><br>

 			<div class="col-12 col-sm-4">
 				<p><a href="/gettingstarted">Documentation</a></p>
 				<p><a href="#">Source</a></p>
 			</div><br>

 			<div class="col-12 col-sm-4">
 				<p><a href="/help">Help</a></p>
 				<p><a href="/search">Search</a></p>
 				<p><a href="#">Mailing Lists</a></p>
 			</div>

 		</div>
 	</div>
 </footer>


 </html>