ext/mcpat/memoryctrl.cc - public/gem5 - Git at Google

 /*****************************************************************************
  *                                McPAT
  *                      SOFTWARE LICENSE AGREEMENT
  *            Copyright 2012 Hewlett-Packard Development Company, L.P.
  *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
  *                          All Rights Reserved
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met: redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer;
  * redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution;
  * neither the name of the copyright holders nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.

  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  ***************************************************************************/

 #include <algorithm>
 #include <cassert>
 #include <cmath>
 #include <iostream>
 #include <string>

 #include "basic_circuit.h"
 #include "basic_components.h"
 #include "common.h"
 #include "const.h"
 #include "io.h"
 #include "logic.h"
 #include "memoryctrl.h"
 #include "parameter.h"

 /* overview of MC models:
  * McPAT memory controllers are modeled according to large number of industrial data points.
  * The Basic memory controller architecture is base on the Synopsis designs
  * (DesignWare DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers)
  * as in Cadence ChipEstimator Tool
  *
  * An MC has 3 parts as shown in this design. McPAT models both high performance MC
  * based on Niagara processor designs and curving and low power MC based on data points in
  * Cadence ChipEstimator Tool.
  *
  * The frontend is modeled analytically, the backend is modeled empirically according to
  * DDR2/DDR3-Lite protocol controllers in Cadence ChipEstimator Tool
  * The PHY is modeled based on
  * "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006,
  * and A 14mW 6.25Gb/s Transceiver in 90nm CMOS for Serial Chip-to-Chip Communication," ISSCC 2007
  *
  * In Cadence ChipEstimator Tool there are two types of memory controllers: the full memory controllers
  * that includes the frontend as the DesignWare DDR2/DDR3-Lite memory controllers and the backend only
  * memory controllers as the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite memory
  * controllers, all memory controller IP in Cadence ChipEstimator Tool are backend memory controllers such as
  * DDRC 1600A and DDRC 800A). Thus,to some extend the area and power difference between DesignWare
  * DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers can be an estimation to the
  * frontend power and area, which is very close the analitically modeled results of the frontend for Niagara2@65nm
  *
  */

 MCBackend::MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_,
                      const MCParameters & mcp_, const MCStatistics & mcs_)
     : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) {
     name = "Transaction Engine";
     local_result = init_interface(&l_ip, name);

     // Set up stats for the power calculations
     tdp_stats.reset();
     tdp_stats.readAc.access = 0.5 * mcp.num_channels * mcp.clockRate;
     tdp_stats.writeAc.access = 0.5 * mcp.num_channels * mcp.clockRate;
     rtp_stats.reset();
     rtp_stats.readAc.access = mcs.reads;
     rtp_stats.writeAc.access = mcs.writes;
 }

 void MCBackend::computeArea() {
     // The area is in nm^2
     if (mcp.mc_type == MC) {
         if (mcp.type == 0) {
             output_data.area = (2.7927 * log(mcp.peak_transfer_rate * 2) -
                                 19.862) / 2.0 * mcp.dataBusWidth / 128.0 *
                 (l_ip.F_sz_um / 0.09) * mcp.num_channels;
         } else {
             output_data.area = 0.15 * mcp.dataBusWidth / 72.0 *
                 (l_ip.F_sz_um / 0.065) * (l_ip.F_sz_um / 0.065) *
                 mcp.num_channels;
         }
     } else {
         //skip old model
         cout << "Unknown memory controllers" << endl;
         exit(0);
         //area based on Cadence ChipEstimator for 8bit bus
         output_data.area = 0.243 * mcp.dataBusWidth / 8;
     }
 }


 void MCBackend::computeEnergy() {
     double C_MCB, mc_power;
     double backend_dyn;
     double backend_gates;
     double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
     double NMOS_sizing = g_tp.min_w_nmos_;
     double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
     double area_um2 = output_data.area * 1e6;

     if (mcp.mc_type == MC) {
         if (mcp.type == 0) {
             //assuming the approximately same scaling factor as seen in processors.
             //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process.
             //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et
             mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
             C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
             //per access energy in memory controller
             power.readOp.dynamic = C_MCB * g_tp.peri_global.Vdd *
                 g_tp.peri_global.Vdd *
                 (mcp.dataBusWidth/*+mcp.addressBusWidth*/);
             power.readOp.leakage = area_um2 / 2 *
                 (g_tp.scaling_factor.core_tx_density) *
                 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
                 g_tp.peri_global.Vdd;//unit W
             power.readOp.gate_leakage = area_um2 / 2 *
                 (g_tp.scaling_factor.core_tx_density) *
                 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
                 g_tp.peri_global.Vdd;//unit W
         } else {
             //Average on DDR2/3 protocol controller and DDRC 1600/800A in
             //Cadence ChipEstimate
             backend_dyn = 0.9e-9 / 800e6 * mcp.clockRate / 12800 *
                 mcp.peak_transfer_rate* mcp.dataBusWidth / 72.0 *
                 g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 1.1 *
                 (l_ip.F_sz_nm/65.0);
             //Scaling to technology and DIMM feature. The base IP support
             //DDR3-1600(PC3 12800)
             //5000 is from Cadence ChipEstimator
             backend_gates = 50000 * mcp.dataBusWidth / 64.0;

             power.readOp.dynamic = backend_dyn;
             power.readOp.leakage = (backend_gates) *
                 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
                 g_tp.peri_global.Vdd;//unit W
             power.readOp.gate_leakage = (backend_gates) *
                 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
                 g_tp.peri_global.Vdd;//unit W
           }
     } else {
         //skip old model
         cout<<"Unknown memory controllers"<<endl;exit(0);
         //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
         C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
         power.readOp.leakage = area_um2 / 2 *
             (g_tp.scaling_factor.core_tx_density) *
             cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
             g_tp.peri_global.Vdd;//unit W
         power.readOp.gate_leakage = area_um2 / 2 *
             (g_tp.scaling_factor.core_tx_density) *
             cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
             g_tp.peri_global.Vdd;//unit W
         power.readOp.dynamic *= 1.2;
         power.readOp.leakage *= 1.2;
         power.readOp.gate_leakage *= 1.2;
         //flash controller has about 20% more backend power since BCH ECC in
         //flash is complex and power hungry
     }
   double long_channel_device_reduction =
       longer_channel_device_reduction(Uncore_device);
   power.readOp.longer_channel_leakage = power.readOp.leakage *
       long_channel_device_reduction;

   // Output leakage power calculations
   output_data.subthreshold_leakage_power =
       longer_channel_device ? power.readOp.longer_channel_leakage :
       power.readOp.leakage;
   output_data.gate_leakage_power = power.readOp.gate_leakage;

   // Peak dynamic power calculation
   output_data.peak_dynamic_power = power.readOp.dynamic *
       (tdp_stats.readAc.access + tdp_stats.writeAc.access);

   // Runtime dynamic energy calculation
   output_data.runtime_dynamic_energy =
       power.readOp.dynamic *
       (rtp_stats.readAc.access + rtp_stats.writeAc.access) *
       mcp.llcBlockSize * BITS_PER_BYTE / mcp.dataBusWidth +
       // Original McPAT code: Assume 10% of peak power is consumed by routine
       // job including memory refreshing and scrubbing
       power.readOp.dynamic * 0.1 * execution_time;
 }

 MCPHY::MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_,
              const MCParameters & mcp_, const MCStatistics & mcs_)
     : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) {
     name = "Physical Interface (PHY)";
     local_result = init_interface(&l_ip, name);

     // Set up stats for the power calculations
     // TODO: Figure out why TDP stats aren't used
     tdp_stats.reset();
     tdp_stats.readAc.access = 0.5 * mcp.num_channels;
     tdp_stats.writeAc.access = 0.5 * mcp.num_channels;
     rtp_stats.reset();
     rtp_stats.readAc.access = mcs.reads;
     rtp_stats.writeAc.access = mcs.writes;
 }

 void MCPHY::computeArea() {
     if (mcp.mc_type == MC) {
         if (mcp.type == 0) {
             //Based on die photos from Niagara 1 and 2.
             //TODO merge this into undifferentiated core.PHY only achieves
             //square root of the ideal scaling.
             output_data.area = (6.4323 * log(mcp.peak_transfer_rate * 2) -
                                 48.134) * mcp.dataBusWidth / 128.0 *
                 (l_ip.F_sz_um / 0.09) * mcp.num_channels / 2;//TODO:/2
         } else {
             //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm
             //for upto DDR3 2133 (PC3 17066)
             double non_IO_percentage = 0.2;
             output_data.area = 1.3 * non_IO_percentage / 2133.0e6 *
                 mcp.clockRate / 17066 * mcp.peak_transfer_rate *
                 mcp.dataBusWidth / 16.0 * (l_ip.F_sz_um / 0.040)*
                 (l_ip.F_sz_um / 0.040) * mcp.num_channels;//um^2
         }
     } else {
         //area based on Cadence ChipEstimator for 8bit bus
         output_data.area = 0.4e6 / 2 * mcp.dataBusWidth / 8 / 1e6;
     }
 }

 void MCPHY::computeEnergy() {
     //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc
     double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
     /*
      * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006;
      * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s
      */
     double power_per_gb_per_s, phy_dyn,phy_gates;
     double NMOS_sizing = g_tp.min_w_nmos_;
     double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
     double area_um2 = output_data.area * 1e6;

     if (mcp.mc_type == MC) {
         if (mcp.type == 0) {
             power_per_gb_per_s = mcp.LVDS ? 0.01 : 0.04;
             //This is from curve fitting based on Niagara 1 and 2's PHY die photo.
             //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
             //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits.
             power.readOp.dynamic = power_per_gb_per_s *
                 sqrt(l_ip.F_sz_um / 0.09) * g_tp.peri_global.Vdd / 1.2 *
                 g_tp.peri_global.Vdd / 1.2;
             power.readOp.leakage = area_um2 / 2 *
                 (g_tp.scaling_factor.core_tx_density) *
                 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
                 g_tp.peri_global.Vdd;//unit W
             power.readOp.gate_leakage = area_um2 / 2 *
                 (g_tp.scaling_factor.core_tx_density) *
                 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
                 g_tp.peri_global.Vdd;//unit W
         } else {
             phy_gates = 200000 * mcp.dataBusWidth / 64.0;
             power_per_gb_per_s = 0.01;
             //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
             power.readOp.dynamic = power_per_gb_per_s * (l_ip.F_sz_um / 0.09) *
                 g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
             power.readOp.leakage = (mcp.withPHY ? phy_gates : 0) *
                 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
                 g_tp.peri_global.Vdd;//unit W
             power.readOp.gate_leakage = (mcp.withPHY ? phy_gates : 0) *
                 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
                 g_tp.peri_global.Vdd;//unit W
         }
     }

 //  double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power numbers are based on 72 bit DIMM interface
 //  power_t.readOp.dynamic *= phy_factor;
 //  power_t.readOp.leakage *= phy_factor;
 //  power_t.readOp.gate_leakage *= phy_factor;

     double long_channel_device_reduction =
         longer_channel_device_reduction(Uncore_device);
     power.readOp.longer_channel_leakage =
         power.readOp.leakage * long_channel_device_reduction;

     // Leakage power calculations
     output_data.subthreshold_leakage_power =
         longer_channel_device ? power.readOp.longer_channel_leakage :
         power.readOp.leakage;
     output_data.gate_leakage_power = power.readOp.gate_leakage;

     // Peak dynamic power calculation
     double data_transfer_unit = (mcp.mc_type == MC)? 72:16;/*DIMM data width*/
     output_data.peak_dynamic_power = power.readOp.dynamic *
         (mcp.peak_transfer_rate * BITS_PER_BYTE / 1e3) * mcp.dataBusWidth /
         data_transfer_unit * mcp.num_channels / mcp.clockRate;

     // Runtime dynamic energy calculation
     output_data.runtime_dynamic_energy =
         power.readOp.dynamic *
         (rtp_stats.readAc.access + rtp_stats.writeAc.access) *
         mcp.llcBlockSize * BITS_PER_BYTE / 1e9 +
         // Original McPAT code: Assume 10% of peak power is consumed by routine
         // job including memory refreshing and scrubbing
         power.readOp.dynamic * 0.1 * execution_time;
 }

 MCFrontEnd::MCFrontEnd(XMLNode* _xml_data, InputParameter* interface_ip_,
                        const MCParameters & mcp_, const MCStatistics & mcs_)
     : McPATComponent(_xml_data), frontendBuffer(NULL), readBuffer(NULL),
       writeBuffer(NULL), MC_arb(NULL), interface_ip(*interface_ip_),
     mcp(mcp_), mcs(mcs_) {
     int tag, data;
     bool is_default = true;//indication for default setup

     /* MC frontend engine channels share the same engines but logically partitioned
      * For all hardware inside MC. different channels do not share resources.
      * TODO: add docodeing/mux stage to steer memory requests to different channels.
      */

     name = "Front End";

     // Memory Request Reorder Buffer
     tag = mcp.addressbus_width + EXTRA_TAG_BITS + mcp.opcodeW;
     data = int(ceil((physical_address_width + mcp.opcodeW) / BITS_PER_BYTE));

     interface_ip.cache_sz = data * mcp.req_window_size_per_channel;
     interface_ip.line_sz = data;
     interface_ip.assoc = mcp.reorder_buffer_assoc;
     interface_ip.nbanks = mcp.reorder_buffer_nbanks;
     interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
     interface_ip.specific_tag = tag > 0;
     interface_ip.tag_w = tag;
     interface_ip.access_mode = Normal;
     interface_ip.obj_func_dyn_energy = 0;
     interface_ip.obj_func_dyn_power = 0;
     interface_ip.obj_func_leak_power = 0;
     interface_ip.obj_func_cycle_t = 1;
     interface_ip.num_rw_ports = 0;
     interface_ip.num_rd_ports = mcp.num_channels;
     interface_ip.num_wr_ports = interface_ip.num_rd_ports;
     interface_ip.num_se_rd_ports = 0;
     interface_ip.num_search_ports = mcp.num_channels;
     interface_ip.is_cache = true;
     interface_ip.pure_cam = false;
     interface_ip.pure_ram = false;
     interface_ip.throughput = 1.0 / mcp.clockRate;
     interface_ip.latency = 1.0 / mcp.clockRate;
     frontendBuffer = new CacheArray(xml_data, &interface_ip, "Reorder Buffer",
                                     Uncore_device, mcp.clockRate);
     children.push_back(frontendBuffer);

     frontendBuffer->tdp_stats.reset();
     frontendBuffer->tdp_stats.readAc.access =
         frontendBuffer->l_ip.num_search_ports +
         frontendBuffer->l_ip.num_wr_ports;
     frontendBuffer->tdp_stats.writeAc.access =
         frontendBuffer->l_ip.num_search_ports;
     frontendBuffer->tdp_stats.searchAc.access =
         frontendBuffer->l_ip.num_wr_ports;
     frontendBuffer->rtp_stats.reset();
     // TODO: These stats assume that access power is calculated per buffer
     // bit, which requires the stats to take into account the number of
     // bits for each buffer slot. This should be revised...
     //For each channel, each memory word need to check the address data to
     //achieve best scheduling results.
     //and this need to be done on all physical DIMMs in each logical memory
     //DIMM *mcp.dataBusWidth/72
     frontendBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize *
         BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72;
     frontendBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize *
         BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72;
     frontendBuffer->rtp_stats.searchAc.access =
         frontendBuffer->rtp_stats.readAc.access +
         frontendBuffer->rtp_stats.writeAc.access;

     // Read Buffers
     //Support key words first operation
     data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE);

     interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel;
     interface_ip.line_sz = data;
     interface_ip.assoc = mcp.read_buffer_assoc;
     interface_ip.nbanks = mcp.read_buffer_nbanks;
     interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
     interface_ip.specific_tag = mcp.read_buffer_tag_width > 0;
     interface_ip.tag_w = mcp.read_buffer_tag_width;
     interface_ip.access_mode = Sequential;
     interface_ip.obj_func_dyn_energy = 0;
     interface_ip.obj_func_dyn_power = 0;
     interface_ip.obj_func_leak_power = 0;
     interface_ip.obj_func_cycle_t = 1;
     interface_ip.num_rw_ports = 0;
     interface_ip.num_rd_ports = mcp.num_channels;
     interface_ip.num_wr_ports = interface_ip.num_rd_ports;
     interface_ip.num_se_rd_ports = 0;
     interface_ip.num_search_ports = 0;
     interface_ip.is_cache = false;
     interface_ip.pure_cam = false;
     interface_ip.pure_ram = true;
     interface_ip.throughput = 1.0 / mcp.clockRate;
     interface_ip.latency = 1.0 / mcp.clockRate;
     readBuffer = new CacheArray(xml_data, &interface_ip, "Read Buffer",
                                 Uncore_device, mcp.clockRate);
     children.push_back(readBuffer);

     readBuffer->tdp_stats.reset();
     readBuffer->tdp_stats.readAc.access = readBuffer->l_ip.num_rd_ports *
         mcs.duty_cycle;
     readBuffer->tdp_stats.writeAc.access = readBuffer->l_ip.num_wr_ports *
         mcs.duty_cycle;
     readBuffer->rtp_stats.reset();
     readBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize *
         BITS_PER_BYTE / mcp.dataBusWidth;
     readBuffer->rtp_stats.writeAc.access = mcs.reads * mcp.llcBlockSize *
         BITS_PER_BYTE / mcp.dataBusWidth;

     // Write Buffer
     //Support key words first operation
     data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE);

     interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel;
     interface_ip.line_sz = data;
     interface_ip.assoc = mcp.write_buffer_assoc;
     interface_ip.nbanks = mcp.write_buffer_nbanks;
     interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
     interface_ip.specific_tag = mcp.write_buffer_tag_width > 0;
     interface_ip.tag_w = mcp.write_buffer_tag_width;
     interface_ip.access_mode = Normal;
     interface_ip.obj_func_dyn_energy = 0;
     interface_ip.obj_func_dyn_power = 0;
     interface_ip.obj_func_leak_power = 0;
     interface_ip.obj_func_cycle_t = 1;
     interface_ip.num_rw_ports = 0;
     interface_ip.num_rd_ports = mcp.num_channels;
     interface_ip.num_wr_ports = interface_ip.num_rd_ports;
     interface_ip.num_se_rd_ports = 0;
     interface_ip.num_search_ports = 0;
     interface_ip.is_cache = false;
     interface_ip.pure_cam = false;
     interface_ip.pure_ram = true;
     interface_ip.throughput = 1.0 / mcp.clockRate;
     interface_ip.latency = 1.0 / mcp.clockRate;
     writeBuffer = new CacheArray(xml_data, &interface_ip, "Write Buffer",
                                  Uncore_device, mcp.clockRate);
     children.push_back(writeBuffer);

     writeBuffer->tdp_stats.reset();
     writeBuffer->tdp_stats.readAc.access = writeBuffer->l_ip.num_rd_ports *
         mcs.duty_cycle;
     writeBuffer->tdp_stats.writeAc.access = writeBuffer->l_ip.num_wr_ports *
         mcs.duty_cycle;
     writeBuffer->rtp_stats.reset();
     writeBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize *
         BITS_PER_BYTE / mcp.dataBusWidth;
     writeBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize *
         BITS_PER_BYTE / mcp.dataBusWidth;

     // TODO: Set up selection logic as a leaf node in tree
     //selection and arbitration logic
     MC_arb =
         new selection_logic(xml_data, is_default,
                             mcp.req_window_size_per_channel, 1, &interface_ip,
                             "Arbitration Logic", (mcs.reads + mcs.writes),
                             mcp.clockRate, Uncore_device);
     // MC_arb is not included in the roll-up due to the uninitialized area
     //children.push_back(MC_arb);
 }

 MemoryController::MemoryController(XMLNode* _xml_data,
                                    InputParameter* interface_ip_)
     : McPATComponent(_xml_data), interface_ip(*interface_ip_) {
     name = "Memory Controller";
     set_mc_param();
     // TODO: Pass params and stats as pointers
     children.push_back(new MCFrontEnd(xml_data, &interface_ip, mcp, mcs));
     children.push_back(new MCBackend(xml_data, &interface_ip, mcp, mcs));

     if (mcp.type==0 || (mcp.type == 1 && mcp.withPHY)) {
         children.push_back(new MCPHY(xml_data, &interface_ip, mcp, mcs));
     }
 }

 void MemoryController::initialize_params() {
     memset(&mcp, 0, sizeof(MCParameters));
 }

 void MemoryController::set_mc_param() {
     initialize_params();

     int num_children = xml_data->nChildNode("param");
     int tech_type;
     int mat_type;
     int i;
     for (i = 0; i < num_children; i++) {
         XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
         XMLCSTR node_name = paramNode->getAttribute("name");
         XMLCSTR value = paramNode->getAttribute("value");

         if (!node_name)
             warnMissingParamName(paramNode->getAttribute("id"));

         ASSIGN_FP_IF("mc_clock", mcp.clockRate);
         ASSIGN_INT_IF("tech_type", tech_type);
         ASSIGN_ENUM_IF("mc_type", mcp.mc_type, MemoryCtrl_type);
         ASSIGN_FP_IF("num_mcs", mcp.num_mcs);
         ASSIGN_INT_IF("llc_line_length", mcp.llc_line_length);
         ASSIGN_INT_IF("databus_width", mcp.databus_width);
         ASSIGN_INT_IF("memory_channels_per_mc", mcp.num_channels);
         ASSIGN_INT_IF("req_window_size_per_channel",
                       mcp.req_window_size_per_channel);
         ASSIGN_INT_IF("IO_buffer_size_per_channel",
                       mcp.IO_buffer_size_per_channel);
         ASSIGN_INT_IF("addressbus_width", mcp.addressbus_width);
         ASSIGN_INT_IF("opcode_width", mcp.opcodeW);
         ASSIGN_INT_IF("type", mcp.type);
         ASSIGN_ENUM_IF("LVDS", mcp.LVDS, bool);
         ASSIGN_ENUM_IF("withPHY", mcp.withPHY, bool);
         ASSIGN_INT_IF("peak_transfer_rate", mcp.peak_transfer_rate);
         ASSIGN_INT_IF("number_ranks", mcp.number_ranks);
         ASSIGN_INT_IF("reorder_buffer_assoc", mcp.reorder_buffer_assoc);
         ASSIGN_INT_IF("reorder_buffer_nbanks", mcp.reorder_buffer_nbanks);
         ASSIGN_INT_IF("read_buffer_assoc", mcp.read_buffer_assoc);
         ASSIGN_INT_IF("read_buffer_nbanks", mcp.read_buffer_nbanks);
         ASSIGN_INT_IF("read_buffer_tag_width", mcp.read_buffer_tag_width);
         ASSIGN_INT_IF("write_buffer_assoc", mcp.write_buffer_assoc);
         ASSIGN_INT_IF("write_buffer_nbanks", mcp.write_buffer_nbanks);
         ASSIGN_INT_IF("write_buffer_tag_width", mcp.write_buffer_tag_width);
         ASSIGN_INT_IF("wire_mat_type", mat_type);
         ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);

         else {
             warnUnrecognizedParam(node_name);
         }
     }

     if (mcp.mc_type != MC) {
         cout << "Unknown memory controller type: Only DRAM controller is "
              << "supported for now" << endl;
                 exit(0);
     }

     // Change from MHz to Hz
     mcp.clockRate *= 1e6;

     interface_ip.data_arr_ram_cell_tech_type    = tech_type;
     interface_ip.data_arr_peri_global_tech_type = tech_type;
     interface_ip.tag_arr_ram_cell_tech_type     = tech_type;
     interface_ip.tag_arr_peri_global_tech_type  = tech_type;
     interface_ip.wire_is_mat_type = mat_type;
     interface_ip.wire_os_mat_type = mat_type;

     num_children = xml_data->nChildNode("stat");
     for (i = 0; i < num_children; i++) {
         XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
         XMLCSTR node_name = statNode->getAttribute("name");
         XMLCSTR value = statNode->getAttribute("value");

         if (!node_name)
             warnMissingStatName(statNode->getAttribute("id"));

         ASSIGN_FP_IF("duty_cycle", mcs.duty_cycle);
         ASSIGN_FP_IF("perc_load", mcs.perc_load);
         ASSIGN_FP_IF("memory_reads", mcs.reads);
         ASSIGN_INT_IF("memory_writes", mcs.writes);

         else {
             warnUnrecognizedStat(node_name);
         }
     }

     // Add ECC overhead
     mcp.llcBlockSize = int(ceil(mcp.llc_line_length / BITS_PER_BYTE)) +
         mcp.llc_line_length;
     mcp.dataBusWidth = int(ceil(mcp.databus_width / BITS_PER_BYTE)) +
         mcp.databus_width;
 }

 MCFrontEnd ::~MCFrontEnd() {

     if (MC_arb) {
         delete MC_arb;
         MC_arb = NULL;
     }
     if (frontendBuffer) {
         delete frontendBuffer;
         frontendBuffer = NULL;
     }
     if (readBuffer) {
         delete readBuffer;
         readBuffer = NULL;
     }
     if (writeBuffer) {
         delete writeBuffer;
         writeBuffer = NULL;
     }
 }

 MemoryController::~MemoryController() {
     // TODO: use default constructor to delete children
 }