| <!DOCTYPE html> |
| <html> |
| <head> |
| <meta charset="utf-8"> |
| <meta http-equiv="x-ua-compatible" content="ie=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| |
| <title>gem5</title> |
| |
| <!-- SITE FAVICON --> |
| <link rel="shortcut icon" type="image/gif" href="/assets/img/gem5ColorVert.gif"/> |
| |
| <link rel="canonical" href="http://localhost:4000/simplecache/"> |
| <link href='https://fonts.googleapis.com/css?family=Open+Sans:400,300,700,800,600' rel='stylesheet' type='text/css'> |
| <link href='https://fonts.googleapis.com/css?family=Muli:400,300' rel='stylesheet' type='text/css'> |
| |
| <!-- FAVICON --> |
| <link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css"> |
| |
| <!-- BOOTSTRAP --> |
| <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous"> |
| |
| <!-- CUSTOM CSS --> |
| <link rel="stylesheet" href="/css/main.css"> |
| </head> |
| |
| |
| <body> |
| <nav class="navbar navbar-expand-md navbar-light bg-light"> |
| <a class="navbar-brand" href="/"> |
| <img src="/assets/img/gem5ColorLong.gif" alt="gem5" height=45px> |
| </a> |
| <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNavDropdown" aria-controls="navbarNavDropdown" aria-expanded="false" aria-label="Toggle navigation"> |
| <span class="navbar-toggler-icon"></span> |
| </button> |
| <div class="collapse navbar-collapse" id="navbarNavDropdown"> |
| <ul class="navbar-nav ml-auto"> |
| <li class="nav-item "> |
| <a class="nav-link" href="/">Home</a> |
| </li> |
| |
| <li class="nav-item dropdown "> |
| <a class="nav-link dropdown-toggle" href="/about" id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> |
| About |
| </a> |
| <div class="dropdown-menu" aria-labelledby="navbarDropdownMenuLink"> |
| <a class="dropdown-item" href="/about">About</a> |
| <a class="dropdown-item" href="/publications">Publications</a> |
| <a class="dropdown-item" href="/governance">Governance</a> |
| </div> |
| </li> |
| |
| <li class="nav-item dropdown active"> |
| <a class="nav-link dropdown-toggle" href="#" id="navbarDropdownMenuLink" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> |
| Documentation |
| </a> |
| <div class="dropdown-menu" aria-labelledby="navbarDropdownMenuLink"> |
| <!-- Pull navigation from _data/documentation.yml --> |
| |
| <a class="dropdown-item" href="/introduction">Introduction</a> |
| |
| <a class="dropdown-item" href="/building">Getting Started</a> |
| |
| <a class="dropdown-item" href="/environment">Modifying/Extending</a> |
| |
| <a class="dropdown-item" href="/MSIintro">Modeling Cache Coherence with Ruby</a> |
| |
| </div> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="/contributing">Contributing</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="/blog">Blog</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="/search">Search</a> |
| </li> |
| </ul> |
| </div> |
| </nav> |
| |
| <main> |
| <div class="sidenav-top"> |
| <a href="/"><img src="/assets/img/gem5ColorLong.gif" height="80"></a> |
| <div class="search"> |
| <form action="/search" method="get"> |
| <!-- <label for="search-box"><i class="fa fa-search"></i></label> --> |
| <input type="text" name="query"> |
| <button type="submit" name="submit"><i class="fa fa-search"></i></button> |
| </form> |
| </div> |
| </div> |
| <div class="sidenav"> |
| <!-- Pull navigation from _data/documentation.yml --> |
| |
| <a class="item" href="/introduction" role="button" aria-expanded="false" aria-controls="collapseExample"> |
| Introduction |
| </a> |
| <div class="collapse " id="introduction"> |
| |
| </div> |
| |
| <a class="item" data-toggle="collapse" href="#pt1" role="button" aria-expanded="false" aria-controls="collapseExample"> |
| Getting Started |
| </a> |
| <div class="collapse " id="pt1"> |
| |
| <a class="subitem " href="/building">Building gem5</a> |
| |
| <a class="subitem " href="/simple_config">Creating a simple configuration script</a> |
| |
| <a class="subitem " href="/cache_config">Adding cache to configuration script</a> |
| |
| <a class="subitem " href="/gem5_stats">Understanding gem5 statistics and output</a> |
| |
| <a class="subitem " href="/example_configs">Using the default configuration scripts</a> |
| |
| </div> |
| |
| <a class="item" data-toggle="collapse" href="#pt2" role="button" aria-expanded="false" aria-controls="collapseExample"> |
| Modifying/Extending |
| </a> |
| <div class="collapse show" id="pt2"> |
| |
| <a class="subitem " href="/environment">Setting up your development environment</a> |
| |
| <a class="subitem " href="/helloobject">Creating a very simple SimObject</a> |
| |
| <a class="subitem " href="/debugging">Debugging gem5</a> |
| |
| <a class="subitem " href="/events">Event-driven programming</a> |
| |
| <a class="subitem " href="/parameters">Adding parameters to SimObjects and more events</a> |
| |
| <a class="subitem " href="/memoryobject">Creating SimObjects in the memory system</a> |
| |
| <a class="subitem active" href="/simplecache">Creating a simple cache object</a> |
| |
| </div> |
| |
| <a class="item" data-toggle="collapse" href="#pt3" role="button" aria-expanded="false" aria-controls="collapseExample"> |
| Modeling Cache Coherence with Ruby |
| </a> |
| <div class="collapse " id="pt3"> |
| |
| <a class="subitem " href="/MSIintro">Introduction to Ruby</a> |
| |
| <a class="subitem " href="/cache-intro">MSI example cache protocol</a> |
| |
| <a class="subitem " href="/cache-declarations">Declaring a state machine</a> |
| |
| <a class="subitem " href="/cache-in-ports">In port code blocks</a> |
| |
| <a class="subitem " href="/cache-actions">Action code blocks</a> |
| |
| <a class="subitem " href="/cache-transitions">Transition code blocks</a> |
| |
| <a class="subitem " href="/directory">MSI Directory implementation</a> |
| |
| <a class="subitem " href="/MSIbuilding">Compiling a SLICC protocol</a> |
| |
| <a class="subitem " href="/configuration">Configuring a simple Ruby system</a> |
| |
| <a class="subitem " href="/running">Running the simple Ruby system</a> |
| |
| <a class="subitem " href="/MSIdebugging">Debugging SLICC Protocols</a> |
| |
| <a class="subitem " href="/simple-MI_example">Configuring for a standard protocol</a> |
| |
| </div> |
| |
| </div> |
| |
| <div class="container" id="doc-container"> |
| <div class="edit"><a href="https://github.com/gem5/new-website/tree/master/_pages/documentation/part2/simplecache.md">Edit this page</a></div> |
| <dl> |
| <dt>authors</dt> |
| <dd>Jason Lowe-Power</dd> |
| </dl> |
| |
| <h1 id="creating-a-simple-cache-object">Creating a simple cache object</h1> |
| |
| <p>In this chapter, we will take the framework for a memory object we |
| created in the last chapter <memoryobject-chapter> and add caching |
| logic to it.</p> |
| |
| <h2 id="simplecache-simobject">SimpleCache SimObject</h2> |
| |
| <p>After creating the SConscript file, that you can download |
| here <../_static/scripts/part2/simplecache/SConscript>, we can create |
| the SimObject Python file. We will call this simple memory object |
| <code class="highlighter-rouge">SimpleCache</code> and create the SimObject Python file in |
| <code class="highlighter-rouge">src/learning_gem5/simple_cache</code>.</p> |
| |
| <p>``` {.sourceCode .python} |
| from m5.params import * |
| from m5.proxy import * |
| from MemObject import MemObject</p> |
| |
| <p>class SimpleCache(MemObject): |
| type = ‘SimpleCache’ |
| cxx_header = “learning_gem5/simple_cache/simple_cache.hh”</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>cpu_side = VectorSlavePort("CPU side port, receives requests") |
| mem_side = MasterPort("Memory side port, sends requests") |
| |
| latency = Param.Cycles(1, "Cycles taken on a hit or to resolve a miss") |
| |
| size = Param.MemorySize('16kB', "The size of the cache") |
| |
| system = Param.System(Parent.any, "The system this cache is part of") ``` |
| </code></pre></div></div> |
| |
| <p>There are a couple of differences between this SimObject file and the |
| one from the previous chapter <memoryobject-chapter>. First, we have a |
| couple of extra parameters. Namely, a latency for cache accesses and the |
| size of the cache. parameters-chapter goes into more detail about these |
| kinds of SimObject parameters.</p> |
| |
| <p>Next, we include a <code class="highlighter-rouge">System</code> parameter, which is a pointer to the main |
| system this cache is connected to. This is needed so we can get the |
| cache block size from the system object when we are initializing the |
| cache. To reference the system object this cache is connected to, we use |
| a special <em>proxy parameter</em>. In this case, we use <code class="highlighter-rouge">Parent.any</code>.</p> |
| |
| <p>In the Python config file, when a <code class="highlighter-rouge">SimpleCache</code> is instantiated, this |
| proxy parameter searches through all of the parents of the <code class="highlighter-rouge">SimpleCache</code> |
| instance to find a SimObject that matches the <code class="highlighter-rouge">System</code> type. Since we |
| often use a <code class="highlighter-rouge">System</code> as the root SimObject, you will often see a |
| <code class="highlighter-rouge">system</code> parameter resolved with this proxy parameter.</p> |
| |
| <p>The third and final difference between the <code class="highlighter-rouge">SimpleCache</code> and the |
| <code class="highlighter-rouge">SimpleMemobj</code> is that instead of having two named CPU ports |
| (<code class="highlighter-rouge">inst_port</code> and <code class="highlighter-rouge">data_port</code>), the <code class="highlighter-rouge">SimpleCache</code> use another special |
| parameter: the <code class="highlighter-rouge">VectorPort</code>. <code class="highlighter-rouge">VectorPorts</code> behave similarly to regular |
| ports (e.g., they are resolved via <code class="highlighter-rouge">getMasterPort</code> and <code class="highlighter-rouge">getSlavePort</code>), |
| but they allow this object to connect to multiple peers. Then, in the |
| resolution functions the parameter we ignored before (<code class="highlighter-rouge">PortID idx</code>) is |
| used to differentiate between the different ports. By using a vector |
| port, this cache can be connected into the system more flexibly than the |
| <code class="highlighter-rouge">SimpleMemobj</code>.</p> |
| |
| <h2 id="implementing-the-simplecache">Implementing the SimpleCache</h2> |
| |
| <p>Most of the code for the <code class="highlighter-rouge">`SimpleCache</code> is the same as the |
| <code class="highlighter-rouge">SimpleMemobj</code>. There are a couple of changes in the constructor and the |
| key memory object functions.</p> |
| |
| <p>First, we need to create the CPU side ports dynamically in the |
| constructor and initialize the extra member functions based on the |
| SimObject parameters.</p> |
| |
| <p>``` {.sourceCode .c++} |
| SimpleCache::SimpleCache(SimpleCacheParams *params) : |
| MemObject(params), |
| latency(params->latency), |
| blockSize(params->system->cacheLineSize()), |
| capacity(params->size / blockSize), |
| memPort(params->name + “.mem_side”, this), |
| blocked(false), outstandingPacket(nullptr), waitingPortId(-1) |
| { |
| for (int i = 0; i < params->port_cpu_side_connection_count; ++i) { |
| cpuPorts.emplace_back(name() + csprintf(“.cpu_side[%d]”, i), i, this); |
| } |
| }</p> |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code> |
| In this function, we use the `cacheLineSize` from the system parameters |
| to set the `blockSize` for this cache. We also initialize the capacity |
| based on the block size and the parameter and initialize other member |
| variables we will need below. Finally, we must create a number of |
| `CPUSidePorts` based on the number of connections to this object. Since |
| the `cpu_side` port was declared as a `VectorSlavePort` in the SimObject |
| Python file, the parameter automatically has a variable |
| `port_cpu_side_connection_count`. This is based on the Python name of |
| the parameter. For each of these connections we add a new `CPUSidePort` |
| to a `cpuPorts` vector declared in the `SimpleCache` class. |
| |
| We also add one extra member variable to the `CPUSidePort` to save its |
| id, and we add this as a parameter to its constructor. |
| |
| Next, we need to implement `getMasterPort` and `getSlavePort`. The |
| `getMasterPort` is exactly the same as the `SimpleMemobj`. For |
| `getSlavePort`, we now need to return the port based on the id |
| requested. |
| |
| ``` {.sourceCode .c++} |
| BaseSlavePort& |
| SimpleCache::getSlavePort(const std::string& if_name, PortID idx) |
| { |
| if (if_name == "cpu_side" && idx < cpuPorts.size()) { |
| return cpuPorts[idx]; |
| } else { |
| return MemObject::getSlavePort(if_name, idx); |
| } |
| } |
| </code></pre></div></div> |
| |
| <p>The implementation of the <code class="highlighter-rouge">CPUSidePort</code> and the <code class="highlighter-rouge">MemSidePort</code> is almost |
| the same as in the <code class="highlighter-rouge">SimpleMemobj</code>. The only difference is we need to add |
| an extra parameter to <code class="highlighter-rouge">handleRequest</code> that is the id of the port which |
| the request originated. Without this id, we would not be able to forward |
| the response to the correct port. The <code class="highlighter-rouge">SimpleMemobj</code> knew which port to |
| send replies based on whether the original request was an instruction or |
| data accesses. However, this information is not useful to the |
| <code class="highlighter-rouge">SimpleCache</code> since it uses a vector of ports and not named ports.</p> |
| |
| <p>The new <code class="highlighter-rouge">handleRequest</code> function does two different things than the |
| <code class="highlighter-rouge">handleRequest</code> function in the <code class="highlighter-rouge">SimpleMemobj</code>. First, it stores the |
| port id of the request as discussed above. Since the <code class="highlighter-rouge">SimpleCache</code> is |
| blocking and only allows a single request outstanding at a time, we only |
| need to save a single port id.</p> |
| |
| <p>Second, it takes time to access a cache. Therefore, we need to take into |
| account the latency to access the cache tags and the cache data for a |
| request. We added an extra parameter to the cache object for this, and |
| in <code class="highlighter-rouge">handleRequest</code> we now use an event to stall the request for the |
| needed amount of time. We schedule a new event for <code class="highlighter-rouge">latency</code> cycles in |
| the future. The <code class="highlighter-rouge">clockEdge</code> function returns the <em>tick</em> that the <em>nth</em> |
| cycle in the future occurs on.</p> |
| |
| <p>``` {.sourceCode .c++} |
| bool |
| SimpleCache::handleRequest(PacketPtr pkt, int port_id) |
| { |
| if (blocked) { |
| return false; |
| } |
| DPRINTF(SimpleCache, “Got request for addr %#x\n”, pkt->getAddr());</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>blocked = true; |
| waitingPortId = port_id; |
| |
| schedule(new AccessEvent(this, pkt), clockEdge(latency)); |
| |
| return true; } ``` |
| </code></pre></div></div> |
| |
| <p>The <code class="highlighter-rouge">AccessEvent</code> is a little more complicated than the <code class="highlighter-rouge">EventWrapper</code> |
| we used in events-chapter. Instead of using an <code class="highlighter-rouge">EventWrapper</code>, in the |
| <code class="highlighter-rouge">SimpleCache</code> we will use a new class. The reason we cannot use an |
| <code class="highlighter-rouge">EventWrapper</code>, is that we need to pass the packet (<code class="highlighter-rouge">pkt</code>) from |
| <code class="highlighter-rouge">handleRequest</code> to the event handler function. The following code is the |
| <code class="highlighter-rouge">AccessEvent</code> class. We only need to implement the <code class="highlighter-rouge">process</code> function, |
| that calls the function we want to use as our event handler, in this |
| case <code class="highlighter-rouge">accessTming</code>. We also pass the flag <code class="highlighter-rouge">AutoDelete</code> to the event |
| constructor so we do not need to worry about freeing the memory for the |
| dynamically created object. The event code will automatically delete the |
| object after the <code class="highlighter-rouge">process</code> function has executed.</p> |
| |
| <p>``` {.sourceCode .c++} |
| class AccessEvent : public Event |
| { |
| private: |
| SimpleCache *cache; |
| PacketPtr pkt; |
| public: |
| AccessEvent(SimpleCache *cache, PacketPtr pkt) : |
| Event(Default_Pri, AutoDelete), cache(cache), pkt(pkt) |
| { } |
| void process() override { |
| cache->accessTiming(pkt); |
| } |
| };</p> |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code> |
| Now, we need to implement the event handler, `accessTiming`. |
| |
| ``` {.sourceCode .c++} |
| void |
| SimpleCache::accessTiming(PacketPtr pkt) |
| { |
| bool hit = accessFunctional(pkt); |
| if (hit) { |
| pkt->makeResponse(); |
| sendResponse(pkt); |
| } else { |
| <miss handling> |
| } |
| } |
| </code></pre></div></div> |
| |
| <p>This function first <em>functionally</em> accesses the cache. This function |
| <code class="highlighter-rouge">accessFunctional</code> (described below) performs the functional access of |
| the cache and either reads or writes the cache on a hit or returns that |
| the access was a miss.</p> |
| |
| <p>If the access is a hit, we simply need to respond to the packet. To |
| respond, you first must call the function <code class="highlighter-rouge">makeResponse</code> on the packet. |
| This converts the packet from a request packet to a response packet. For |
| instance, if the memory command in the packet was a <code class="highlighter-rouge">ReadReq</code> this gets |
| converted into a <code class="highlighter-rouge">ReadResp</code>. Writes behave similarly. Then, we can send |
| the response back to the CPU.</p> |
| |
| <p>The <code class="highlighter-rouge">sendResponse</code> function does the same things as the <code class="highlighter-rouge">handleResponse</code> |
| function in the <code class="highlighter-rouge">SimpleMemobj</code> except that it uses the <code class="highlighter-rouge">waitingPortId</code> |
| to send the packet to the right port. In this function, we need to mark |
| the <code class="highlighter-rouge">SimpleCache</code> unblocked before calling <code class="highlighter-rouge">sendPacket</code> in case the peer |
| on the CPU side immediately calls <code class="highlighter-rouge">sendTimingReq</code>. Then, we try to send |
| retries to the CPU side ports if the <code class="highlighter-rouge">SimpleCache</code> can now receive |
| requests and the ports need to be sent retries.</p> |
| |
| <p>``` {.sourceCode .c++} |
| void SimpleCache::sendResponse(PacketPtr pkt) |
| { |
| int port = waitingPortId;</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>blocked = false; |
| waitingPortId = -1; |
| |
| cpuPorts[port].sendPacket(pkt); |
| for (auto& port : cpuPorts) { |
| port.trySendRetry(); |
| } } ``` |
| </code></pre></div></div> |
| |
| <hr /> |
| |
| <p>Back to the <code class="highlighter-rouge">accessTiming</code> function, we now need to handle the cache |
| miss case. On a miss, we first have to check to see if the missing |
| packet is to an entire cache block. If the packet is aligned and the |
| size of the request is the size of a cache block, then we can simply |
| forward the request to memory, just like in the <code class="highlighter-rouge">SimpleMemobj</code>.</p> |
| |
| <p>However, if the packet is smaller than a cache block, then we need to |
| create a new packet to read the entire cache block from memory. Here, |
| whether the packet is a read or a write request, we send a read request |
| to memory to load the data for the cache block into the cache. In the |
| case of a write, it will occur in the cache after we have loaded the |
| data from memory.</p> |
| |
| <p>Then, we create a new packet, that is <code class="highlighter-rouge">blockSize</code> in size and we call |
| the <code class="highlighter-rouge">allocate</code> function to allocate memory in the <code class="highlighter-rouge">Packet</code> object for |
| the data that we will read from memory. Note: this memory is freed when |
| we free the packet. We use the original request object in the packet so |
| the memory-side objects know the original requestor and the original |
| request type for statistics.</p> |
| |
| <p>Finally, we save the original packet pointer (<code class="highlighter-rouge">pkt</code>) in a member |
| variable <code class="highlighter-rouge">outstandingPacket</code> so we can recover it when the <code class="highlighter-rouge">SimpleCache</code> |
| receives a response. Then, we send the new packet across the memory side |
| port.</p> |
| |
| <p>``` {.sourceCode .c++} |
| void |
| SimpleCache::accessTiming(PacketPtr pkt) |
| { |
| bool hit = accessFunctional(pkt); |
| if (hit) { |
| pkt->makeResponse(); |
| sendResponse(pkt); |
| } else { |
| Addr addr = pkt->getAddr(); |
| Addr block_addr = pkt->getBlockAddr(blockSize); |
| unsigned size = pkt->getSize(); |
| if (addr == block_addr && size == blockSize) { |
| DPRINTF(SimpleCache, “forwarding packet\n”); |
| memPort.sendPacket(pkt); |
| } else { |
| DPRINTF(SimpleCache, “Upgrading packet to block size\n”); |
| panic_if(addr - block_addr + size > blockSize, |
| “Cannot handle accesses that span multiple cache lines”);</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code> assert(pkt->needsResponse()); |
| MemCmd cmd; |
| if (pkt->isWrite() || pkt->isRead()) { |
| cmd = MemCmd::ReadReq; |
| } else { |
| panic("Unknown packet type in upgrade size"); |
| } |
| |
| PacketPtr new_pkt = new Packet(pkt->req, cmd, blockSize); |
| new_pkt->allocate(); |
| |
| outstandingPacket = pkt; |
| |
| memPort.sendPacket(new_pkt); |
| } |
| } } ``` |
| </code></pre></div></div> |
| |
| <p>On a response from memory, we know that this was caused by a cache miss. |
| The first step is to insert the responding packet into the cache.</p> |
| |
| <p>Then, either there is an <code class="highlighter-rouge">outstandingPacket</code>, in which case we need to |
| forward that packet to the original requestor, or there is no |
| <code class="highlighter-rouge">outstandingPacket</code> which means we should forward the <code class="highlighter-rouge">pkt</code> in the |
| response to the original requestor.</p> |
| |
| <p>If the packet we are receiving as a response was an upgrade packet |
| because the original request was smaller than a cache line, then we need |
| to copy the new data to the outstandingPacket packet or write to the |
| cache on a write. Then, we need to delete the new packet that we made in |
| the miss handling logic.</p> |
| |
| <p>``` {.sourceCode .c++} |
| bool |
| SimpleCache::handleResponse(PacketPtr pkt) |
| { |
| assert(blocked); |
| DPRINTF(SimpleCache, “Got response for addr %#x\n”, pkt->getAddr()); |
| insert(pkt);</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>if (outstandingPacket != nullptr) { |
| accessFunctional(outstandingPacket); |
| outstandingPacket->makeResponse(); |
| delete pkt; |
| pkt = outstandingPacket; |
| outstandingPacket = nullptr; |
| } // else, pkt contains the data it needs |
| |
| sendResponse(pkt); |
| |
| return true; } ``` |
| </code></pre></div></div> |
| |
| <h3 id="functional-cache-logic">Functional cache logic</h3> |
| |
| <p>Now, we need to implement two more functions: <code class="highlighter-rouge">accessFunctional</code> and |
| <code class="highlighter-rouge">insert</code>. These two functions make up the key components of the cache |
| logic.</p> |
| |
| <p>First, to functionally update the cache, we first need storage for the |
| cache contents. The simplest possible cache storage is a map (hashtable) |
| that maps from addresses to data. Thus, we will add the following member |
| to the <code class="highlighter-rouge">SimpleCache</code>.</p> |
| |
| <p>``` {.sourceCode .c++} |
| std::unordered_map<Addr, uint8_t*> cacheStore;</p> |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code> |
| To access the cache, we first check to see if there is an entry in the |
| map which matches the address in the packet. We use the `getBlockAddr` |
| function of the `Packet` type to get the block-aligned address. Then, we |
| simply search for that address in the map. If we do not find the |
| address, then this function returns `false`, the data is not in the |
| cache, and it is a miss. |
| |
| Otherwise, if the packet is a write request, we need to update the data |
| in the cache. To do this, we write the data from the packet to the |
| cache. We use the `writeDataToBlock` function which writes the data in |
| the packet to the write offset into a potentially larger block of data. |
| This function takes the cache block offset and the block size (as a |
| parameter) and writes the correct offset into the pointer passed as the |
| first parameter. |
| |
| If the packet is a read request, we need to update the packet's data |
| with the data from the cache. The `setDataFromBlock` function performs |
| the same offset calculation as the `writeDataToBlock` function, but |
| writes the packet with the data from the pointer in the first parameter. |
| |
| ``` {.sourceCode .c++} |
| bool |
| SimpleCache::accessFunctional(PacketPtr pkt) |
| { |
| Addr block_addr = pkt->getBlockAddr(blockSize); |
| auto it = cacheStore.find(block_addr); |
| if (it != cacheStore.end()) { |
| if (pkt->isWrite()) { |
| pkt->writeDataToBlock(it->second, blockSize); |
| } else if (pkt->isRead()) { |
| pkt->setDataFromBlock(it->second, blockSize); |
| } else { |
| panic("Unknown packet type!"); |
| } |
| return true; |
| } |
| return false; |
| } |
| </code></pre></div></div> |
| |
| <p>Finally, we also need to implement the <code class="highlighter-rouge">insert</code> function. This function |
| is called every time the memory side port responds to a request.</p> |
| |
| <p>The first step is to check if the cache is currently full. If the cache |
| has more entries (blocks) than the capacity of the cache as set by the |
| SimObject parameter, then we need to evict something. The following code |
| evicts a random entry by leveraging the hashtable implementation of the |
| C++ <code class="highlighter-rouge">unordered_map</code>.</p> |
| |
| <p>On an eviction, we need to write the data back to the backing memory in |
| case it has been updated. For this, we create a new <code class="highlighter-rouge">Request</code>-<code class="highlighter-rouge">Packet</code> |
| pair. The packet uses a new memory command: <code class="highlighter-rouge">MemCmd::WritebackDirty</code>. |
| Then, we send the packet across the memory side port (<code class="highlighter-rouge">memPort</code>) and |
| erase the entry in the cache storage map.</p> |
| |
| <p>Then, after a block has potentially been evicted, we add the new address |
| to the cache. For this we simply allocate space for the block and add an |
| entry to the map. Finally, we write the data from the response packet in |
| to the newly allocated block. This data is guaranteed to be the size of |
| the cache block since we made sure to make a new packet in the cache |
| miss logic if the packet was smaller than a cache block.</p> |
| |
| <p>``` {.sourceCode .c++} |
| void |
| SimpleCache::insert(PacketPtr pkt) |
| { |
| if (cacheStore.size() >= capacity) { |
| // Select random thing to evict. This is a little convoluted since we |
| // are using a std::unordered_map. See http://bit.ly/2hrnLP2 |
| int bucket, bucket_size; |
| do { |
| bucket = random_mt.random(0, (int)cacheStore.bucket_count() - 1); |
| } while ( (bucket_size = cacheStore.bucket_size(bucket)) == 0 ); |
| auto block = std::next(cacheStore.begin(bucket), |
| random_mt.random(0, bucket_size - 1));</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code> RequestPtr req = new Request(block->first, blockSize, 0, 0); |
| PacketPtr new_pkt = new Packet(req, MemCmd::WritebackDirty, blockSize); |
| new_pkt->dataDynamic(block->second); // This will be deleted later |
| |
| DPRINTF(SimpleCache, "Writing packet back %s\n", pkt->print()); |
| memPort.sendTimingReq(new_pkt); |
| |
| cacheStore.erase(block->first); |
| } |
| uint8_t *data = new uint8_t[blockSize]; |
| cacheStore[pkt->getAddr()] = data; |
| |
| pkt->writeDataToBlock(data, blockSize); } ``` |
| </code></pre></div></div> |
| |
| <h2 id="creating-a-config-file-for-the-cache">Creating a config file for the cache</h2> |
| |
| <p>The last step in our implementation is to create a new Python config |
| script that uses our cache. We can use the outline from the |
| last chapter <memoryobject-chapter> as a starting point. The only |
| difference is we may want to set the parameters of this cache (e.g., set |
| the size of the cache to <code class="highlighter-rouge">1kB</code>) and instead of using the named ports |
| (<code class="highlighter-rouge">data_port</code> and <code class="highlighter-rouge">inst_port</code>), we just use the <code class="highlighter-rouge">cpu_side</code> port twice. |
| Since <code class="highlighter-rouge">cpu_side</code> is a <code class="highlighter-rouge">VectorPort</code>, it will automatically create |
| multiple port connections.</p> |
| |
| <p>``` {.sourceCode .python} |
| import m5 |
| from m5.objects import *</p> |
| |
| <p>…</p> |
| |
| <p>system.cache = SimpleCache(size=’1kB’)</p> |
| |
| <p>system.cpu.icache_port = system.cache.cpu_side |
| system.cpu.dcache_port = system.cache.cpu_side</p> |
| |
| <p>system.membus = SystemXBar()</p> |
| |
| <p>system.cache.mem_side = system.membus.slave</p> |
| |
| <p>…</p> |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code> |
| The Python config file can be downloaded |
| here \<../\_static/scripts/part2/simplecache/simple\_cache.py\> |
| |
| Running this script should produce the expected output from the hello |
| binary. |
| |
| gem5 Simulator System. http://gem5.org |
| gem5 is copyrighted software; use the --copyright option for details. |
| |
| gem5 compiled Jan 10 2017 17:38:15 |
| gem5 started Jan 10 2017 17:40:03 |
| gem5 executing on chinook, pid 29031 |
| command line: build/X86/gem5.opt configs/learning_gem5/part2/simple_cache.py |
| |
| Global frequency set at 1000000000000 ticks per second |
| warn: DRAM device capacity (8192 Mbytes) does not match the address range assigned (512 Mbytes) |
| 0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 |
| warn: CoherentXBar system.membus has no snooping ports attached! |
| warn: ClockedObject: More than one power state change request encountered within the same simulation tick |
| Beginning simulation! |
| info: Entering event queue @ 0. Starting simulation... |
| Hello world! |
| Exiting @ tick 56082000 because target called exit() |
| |
| Modifying the size of the cache, for instance to 128 KB, should improve |
| the performance of the system. |
| |
| gem5 Simulator System. http://gem5.org |
| gem5 is copyrighted software; use the --copyright option for details. |
| |
| gem5 compiled Jan 10 2017 17:38:15 |
| gem5 started Jan 10 2017 17:41:10 |
| gem5 executing on chinook, pid 29037 |
| command line: build/X86/gem5.opt configs/learning_gem5/part2/simple_cache.py |
| |
| Global frequency set at 1000000000000 ticks per second |
| warn: DRAM device capacity (8192 Mbytes) does not match the address range assigned (512 Mbytes) |
| 0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 |
| warn: CoherentXBar system.membus has no snooping ports attached! |
| warn: ClockedObject: More than one power state change request encountered within the same simulation tick |
| Beginning simulation! |
| info: Entering event queue @ 0. Starting simulation... |
| Hello world! |
| Exiting @ tick 32685000 because target called exit() |
| |
| Adding statistics to the cache |
| ------------------------------ |
| |
| Knowing the overall execution time of the system is one important |
| metric. However, you may want to include other statistics as well, such |
| as the hit and miss rates of the cache. To do this, we need to add some |
| statistics to the `SimpleCache` object. |
| |
| First, we need to declare the statistics in the `SimpleCache` object. |
| They are part of the `Stats` namespace. In this case, we'll make four |
| statistics. The number of `hits` and the number of `misses` are just |
| simple `Scalar` counts. We will also add a `missLatency` which is a |
| histogram of the time it takes to satisfy a miss. Finally, we'll add a |
| special statistic called a `Formula` for the `hitRatio` that is a |
| combination of other statistics (the number of hits and misses). |
| |
| ``` {.sourceCode .c++} |
| class SimpleCache : public MemObject |
| { |
| private: |
| ... |
| |
| Tick missTime; // To track the miss latency |
| |
| Stats::Scalar hits; |
| Stats::Scalar misses; |
| Stats::Histogram missLatency; |
| Stats::Formula hitRatio; |
| |
| public: |
| ... |
| |
| void regStats() override; |
| }; |
| </code></pre></div></div> |
| |
| <p>Next, we have to define the function to override the <code class="highlighter-rouge">regStats</code> function |
| so the statistics are registered with gem5’s statistics infrastructure. |
| Here, for each statistic, we give it a name based on the “parent” |
| SimObject name and a description. For the histogram statistic, we also |
| need to initialize it with how many buckets we want in the histogram. |
| Finally, for the formula, we simply need to write the formula down in |
| code.</p> |
| |
| <p>``` {.sourceCode .c++} |
| void |
| SimpleCache::regStats() |
| { |
| // If you don’t do this you get errors about uninitialized stats. |
| MemObject::regStats();</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>hits.name(name() + ".hits") |
| .desc("Number of hits") |
| ; |
| |
| misses.name(name() + ".misses") |
| .desc("Number of misses") |
| ; |
| |
| missLatency.name(name() + ".missLatency") |
| .desc("Ticks for misses to the cache") |
| .init(16) // number of buckets |
| ; |
| |
| hitRatio.name(name() + ".hitRatio") |
| .desc("The ratio of hits to the total accesses to the cache") |
| ; |
| |
| hitRatio = hits / (hits + misses); |
| </code></pre></div></div> |
| |
| <p>}</p> |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code> |
| Finally, we need to use update the statistics in our code. In the |
| `accessTiming` class, we can increment the `hits` and `misses` on a hit |
| and miss respectively. Additionally, on a miss, we save the current time |
| so we can measure the latency. |
| |
| ``` {.sourceCode .c++} |
| void |
| SimpleCache::accessTiming(PacketPtr pkt) |
| { |
| bool hit = accessFunctional(pkt); |
| if (hit) { |
| hits++; // update stats |
| pkt->makeResponse(); |
| sendResponse(pkt); |
| } else { |
| misses++; // update stats |
| missTime = curTick(); |
| ... |
| </code></pre></div></div> |
| |
| <p>Then, when we get a response, we need to add the measured latency to our |
| histogram. For this, we use the <code class="highlighter-rouge">sample</code> function. This adds a single |
| point to the histogram. This histogram automatically resizes the buckets |
| to fit the data it receives.</p> |
| |
| <p>``` {.sourceCode .c++} |
| bool |
| SimpleCache::handleResponse(PacketPtr pkt) |
| { |
| insert(pkt);</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>missLatency.sample(curTick() - missTime); |
| ... ``` |
| </code></pre></div></div> |
| |
| <p>The complete code for the <code class="highlighter-rouge">SimpleCache</code> header file can be downloaded |
| here <../_static/scripts/part2/simplecache/simple_cache.hh>, and the |
| complete code for the implementation of the <code class="highlighter-rouge">SimpleCache</code> can be |
| downloaded |
| here <../_static/scripts/part2/simplecache/simple_cache.cc>.</p> |
| |
| <p>Now, if we run the above config file, we can check on the statistics in |
| the <code class="highlighter-rouge">stats.txt</code> file. For the 1 KB case, we get the following |
| statistics. 91% of the accesses are hits and the average miss latency is |
| 53334 ticks (or 53 ns).</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>system.cache.hits 8431 # Number of hits |
| system.cache.misses 877 # Number of misses |
| system.cache.missLatency::samples 877 # Ticks for misses to the cache |
| system.cache.missLatency::mean 53334.093501 # Ticks for misses to the cache |
| system.cache.missLatency::gmean 44506.409356 # Ticks for misses to the cache |
| system.cache.missLatency::stdev 36749.446469 # Ticks for misses to the cache |
| system.cache.missLatency::0-32767 305 34.78% 34.78% # Ticks for misses to the cache |
| system.cache.missLatency::32768-65535 365 41.62% 76.40% # Ticks for misses to the cache |
| system.cache.missLatency::65536-98303 164 18.70% 95.10% # Ticks for misses to the cache |
| system.cache.missLatency::98304-131071 12 1.37% 96.47% # Ticks for misses to the cache |
| system.cache.missLatency::131072-163839 17 1.94% 98.40% # Ticks for misses to the cache |
| system.cache.missLatency::163840-196607 7 0.80% 99.20% # Ticks for misses to the cache |
| system.cache.missLatency::196608-229375 0 0.00% 99.20% # Ticks for misses to the cache |
| system.cache.missLatency::229376-262143 0 0.00% 99.20% # Ticks for misses to the cache |
| system.cache.missLatency::262144-294911 2 0.23% 99.43% # Ticks for misses to the cache |
| system.cache.missLatency::294912-327679 4 0.46% 99.89% # Ticks for misses to the cache |
| system.cache.missLatency::327680-360447 1 0.11% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::360448-393215 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::393216-425983 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::425984-458751 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::458752-491519 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::491520-524287 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::total 877 # Ticks for misses to the cache |
| system.cache.hitRatio 0.905780 # The ratio of hits to the total access |
| </code></pre></div></div> |
| |
| <p>And when using a 128 KB cache, we get a slightly higher hit ratio. It |
| seems like our cache is working as expected!</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>system.cache.hits 8944 # Number of hits |
| system.cache.misses 364 # Number of misses |
| system.cache.missLatency::samples 364 # Ticks for misses to the cache |
| system.cache.missLatency::mean 64222.527473 # Ticks for misses to the cache |
| system.cache.missLatency::gmean 61837.584812 # Ticks for misses to the cache |
| system.cache.missLatency::stdev 27232.443748 # Ticks for misses to the cache |
| system.cache.missLatency::0-32767 0 0.00% 0.00% # Ticks for misses to the cache |
| system.cache.missLatency::32768-65535 254 69.78% 69.78% # Ticks for misses to the cache |
| system.cache.missLatency::65536-98303 106 29.12% 98.90% # Ticks for misses to the cache |
| system.cache.missLatency::98304-131071 0 0.00% 98.90% # Ticks for misses to the cache |
| system.cache.missLatency::131072-163839 0 0.00% 98.90% # Ticks for misses to the cache |
| system.cache.missLatency::163840-196607 0 0.00% 98.90% # Ticks for misses to the cache |
| system.cache.missLatency::196608-229375 0 0.00% 98.90% # Ticks for misses to the cache |
| system.cache.missLatency::229376-262143 0 0.00% 98.90% # Ticks for misses to the cache |
| system.cache.missLatency::262144-294911 2 0.55% 99.45% # Ticks for misses to the cache |
| system.cache.missLatency::294912-327679 1 0.27% 99.73% # Ticks for misses to the cache |
| system.cache.missLatency::327680-360447 1 0.27% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::360448-393215 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::393216-425983 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::425984-458751 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::458752-491519 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::491520-524287 0 0.00% 100.00% # Ticks for misses to the cache |
| system.cache.missLatency::total 364 # Ticks for misses to the cache |
| system.cache.hitRatio 0.960894 # The ratio of hits to the total access |
| </code></pre></div></div> |
| |
| <br> |
| |
| <!-- RETRIVE PREVIOUS PAGE LINK --> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <!-- RETRIEVE NEXT PAGE LINK --> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="navbuttons"> |
| |
| <a href="/memoryobject"><button type="button" class="btn btn-outline-primary">PREVIOUS</button></a> |
| |
| |
| |
| </div> |
| </div> |
| |
| </main> |
| |
| |
| <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.3/umd/popper.min.js" integrity="sha384-ZMP7rVo3mIykV+2+9J3UJ46jBk0WLaUAdn689aCwoqbBJiSnjAK/l8WvCWPIPm49" crossorigin="anonymous"></script> |
| <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.min.js" integrity="sha384-ChfqqxuZUCnJSK3+MXmPNIyE6ZbWh2IMqE241rYiqJxyMiZ6OW/JmZQ5stwEULTy" crossorigin="anonymous"></script> |
| |
| <script> |
| // When the user scrolls down 20px from the top of the document, show the button |
| window.onscroll = function() {scrollFunction()}; |
| |
| function scrollFunction() { |
| if (document.body.scrollTop > 100 || document.documentElement.scrollTop > 20) { |
| document.getElementById("myBtn").style.display = "block"; |
| } else { |
| document.getElementById("myBtn").style.display = "none"; |
| } |
| } |
| |
| // When the user clicks on the button, scroll to the top of the document |
| function topFunction() { |
| document.body.scrollTop = 0; |
| document.documentElement.scrollTop = 0; |
| } |
| </script> |
| |
| </body> |
| |
| |
| </html> |