mem: Fix DRAM controller to operate on its own address space

Typically, a memory controller is assigned an address range of the
form [start, end). This address range might be interleaved and
therefore only a non-continuous subset of the addresses in the address
range is handed by this controller.

Prior to this patch, the DRAM controller was unaware of the
interleaving and as a result the address range could affect the
mapping of addresses to DRAM ranks, rows and columns. This patch
changes the DRAM controller, to transform the input address to a
continuous range of the form [0, size). As a result the DRAM
controller always operates on a dense and continuous address range
regardlesss of the system configuration.

Change-Id: I7d273a630928421d1854658c9bb0ab34e9360851
Signed-off-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/19328
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Wendy Elsasser <wendy.elsasser@arm.com>
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index 7f73776..0b5011c 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -66,10 +66,6 @@
 
     # Only do this for DRAMs
     if issubclass(cls, m5.objects.DRAMCtrl):
-        # Inform each controller how many channels to account
-        # for
-        ctrl.channels = nbr_mem_ctrls
-
         # If the channel bits are appearing after the column
         # bits, we need to add the appropriate number of bits
         # for the row buffer size
diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py
index 14db3d3..4ed4e50 100644
--- a/src/mem/DRAMCtrl.py
+++ b/src/mem/DRAMCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2018 ARM Limited
+# Copyright (c) 2012-2019 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -130,10 +130,6 @@
     # update per memory class when bank group architecture is supported
     bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank")
     banks_per_rank = Param.Unsigned("Number of banks per rank")
-    # only used for the address mapping as the controller by
-    # construction is a single channel and multiple controllers have
-    # to be instantiated for a multi-channel configuration
-    channels = Param.Unsigned(1, "Number of channels")
 
     # Enable DRAM powerdown states if True. This is False by default due to
     # performance being lower when enabled
diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc
index 37db27c..896247e 100644
--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@@ -76,7 +76,7 @@
     ranksPerChannel(p->ranks_per_channel),
     bankGroupsPerRank(p->bank_groups_per_rank),
     bankGroupArch(p->bank_groups_per_rank > 0),
-    banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
+    banksPerRank(p->banks_per_rank), rowsPerBank(0),
     readBufferSize(p->read_buffer_size),
     writeBufferSize(p->write_buffer_size),
     writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0),
@@ -201,10 +201,6 @@
     // a bit of sanity checks on the interleaving, save it for here to
     // ensure that the system pointer is initialised
     if (range.interleaved()) {
-        if (channels != range.stripes())
-            fatal("%s has %d interleaved address stripes but %d channel(s)\n",
-                  name(), range.stripes(), channels);
-
         if (addrMapping == Enums::RoRaBaChCo) {
             if (rowBufferSize != range.granularity()) {
                 fatal("Channel interleaving of %s doesn't match RoRaBaChCo "
@@ -323,14 +319,11 @@
 
     // we have removed the lowest order address bits that denote the
     // position within the column
-    if (addrMapping == Enums::RoRaBaChCo) {
+    if (addrMapping == Enums::RoRaBaChCo || addrMapping == Enums::RoRaBaCoCh) {
         // the lowest order bits denote the column to ensure that
         // sequential cache lines occupy the same row
         addr = addr / columnsPerRowBuffer;
 
-        // take out the channel part of the address
-        addr = addr / channels;
-
         // after the channel bits, get the bank bits to interleave
         // over the banks
         bank = addr % banksPerRank;
@@ -343,28 +336,6 @@
 
         // lastly, get the row bits, no need to remove them from addr
         row = addr % rowsPerBank;
-    } else if (addrMapping == Enums::RoRaBaCoCh) {
-        // take out the lower-order column bits
-        addr = addr / columnsPerStripe;
-
-        // take out the channel part of the address
-        addr = addr / channels;
-
-        // next, the higher-order column bites
-        addr = addr / (columnsPerRowBuffer / columnsPerStripe);
-
-        // after the column bits, we get the bank bits to interleave
-        // over the banks
-        bank = addr % banksPerRank;
-        addr = addr / banksPerRank;
-
-        // after the bank, we get the rank bits which thus interleaves
-        // over the ranks
-        rank = addr % ranksPerChannel;
-        addr = addr / ranksPerChannel;
-
-        // lastly, get the row bits, no need to remove them from addr
-        row = addr % rowsPerBank;
     } else if (addrMapping == Enums::RoCoRaBaCh) {
         // optimise for closed page mode and utilise maximum
         // parallelism of the DRAM (at the cost of power)
@@ -372,11 +343,6 @@
         // take out the lower-order column bits
         addr = addr / columnsPerStripe;
 
-        // take out the channel part of the address, not that this has
-        // to match with how accesses are interleaved between the
-        // controllers in the address mapping
-        addr = addr / channels;
-
         // start with the bank bits, as this provides the maximum
         // opportunity for parallelism between requests
         bank = addr % banksPerRank;
@@ -425,12 +391,13 @@
     // address of first DRAM packet is kept unaliged. Subsequent DRAM packets
     // are aligned to burst size boundaries. This is to ensure we accurately
     // check read packets against packets in write queue.
-    Addr addr = pkt->getAddr();
+    const Addr base_addr = getCtrlAddr(pkt->getAddr());
+    Addr addr = base_addr;
     unsigned pktsServicedByWrQ = 0;
     BurstHelper* burst_helper = NULL;
     for (int cnt = 0; cnt < pktCount; ++cnt) {
         unsigned size = std::min((addr | (burstSize - 1)) + 1,
-                        pkt->getAddr() + pkt->getSize()) - addr;
+                                 base_addr + pkt->getSize()) - addr;
         stats.readPktSize[ceilLog2(size)]++;
         stats.readBursts++;
         stats.masterReadAccesses[pkt->masterId()]++;
@@ -525,10 +492,11 @@
 
     // if the request size is larger than burst size, the pkt is split into
     // multiple DRAM packets
-    Addr addr = pkt->getAddr();
+    const Addr base_addr = getCtrlAddr(pkt->getAddr());
+    Addr addr = base_addr;
     for (int cnt = 0; cnt < pktCount; ++cnt) {
         unsigned size = std::min((addr | (burstSize - 1)) + 1,
-                        pkt->getAddr() + pkt->getSize()) - addr;
+                                 base_addr + pkt->getSize()) - addr;
         stats.writePktSize[ceilLog2(size)]++;
         stats.writeBursts++;
         stats.masterWriteAccesses[pkt->masterId()]++;
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh
index 8c8c245..ad2f051 100644
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -819,6 +819,20 @@
                            unsigned int size, bool isRead) const;
 
     /**
+     * Get an address in a dense range which starts from 0. The input
+     * address is the physical address of the request in an address
+     * space that contains other SimObjects apart from this
+     * controller.
+     *
+     * @param addr The intput address which should be in the addrRange
+     * @return An address in the continues range [0, max)
+     */
+    Addr getCtrlAddr(Addr addr)
+    {
+        return range.getOffset(addr);
+    }
+
+    /**
      * The memory schduler/arbiter - picks which request needs to
      * go next, based on the specified policy such as FCFS or FR-FCFS
      * and moves it to the head of the queue.
@@ -946,7 +960,6 @@
     const uint32_t bankGroupsPerRank;
     const bool bankGroupArch;
     const uint32_t banksPerRank;
-    const uint32_t channels;
     uint32_t rowsPerBank;
     const uint32_t readBufferSize;
     const uint32_t writeBufferSize;