| /***************************************************************************** |
| * McPAT/CACTI |
| * SOFTWARE LICENSE AGREEMENT |
| * Copyright 2012 Hewlett-Packard Development Company, L.P. |
| * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. |
| * All Rights Reserved |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| ***************************************************************************/ |
| |
| |
| |
| #include <cmath> |
| #include <iostream> |
| |
| #include "uca.h" |
| |
| UCA::UCA(const DynamicParameter & dyn_p) |
| : dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) { |
| int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks) |
| / 2 : (_log2(nbanks) - _log2(nbanks) / 2)); |
| int num_banks_hor_dir = nbanks / num_banks_ver_dir; |
| |
| if (dp.use_inp_params) { |
| RWP = dp.num_rw_ports; |
| ERP = dp.num_rd_ports; |
| EWP = dp.num_wr_ports; |
| SCHP = dp.num_search_ports; |
| } else { |
| RWP = g_ip->num_rw_ports; |
| ERP = g_ip->num_rd_ports; |
| EWP = g_ip->num_wr_ports; |
| SCHP = g_ip->num_search_ports; |
| } |
| |
| num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) * |
| (RWP + ERP + EWP); |
| num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); |
| num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); |
| num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; |
| num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; |
| |
| if (!dp.fully_assoc && !dp.pure_cam) { |
| |
| if (g_ip->fast_access && dp.is_tag == false) { |
| num_do_b_bank *= g_ip->data_assoc; |
| } |
| |
| htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, |
| num_addr_b_bank, num_di_b_bank, 0, |
| num_do_b_bank, 0, num_banks_ver_dir * 2, |
| num_banks_hor_dir * 2, Add_htree, true); |
| htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, |
| num_addr_b_bank, num_di_b_bank, 0, |
| num_do_b_bank, 0, num_banks_ver_dir * 2, |
| num_banks_hor_dir * 2, Data_in_htree, true); |
| htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, |
| num_addr_b_bank, num_di_b_bank, 0, |
| num_do_b_bank, 0, num_banks_ver_dir * 2, |
| num_banks_hor_dir * 2, Data_out_htree, true); |
| } |
| |
| else { |
| |
| htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, |
| num_addr_b_bank, num_di_b_bank, |
| num_si_b_bank, num_do_b_bank, num_so_b_bank, |
| num_banks_ver_dir * 2, num_banks_hor_dir * 2, |
| Add_htree, true); |
| htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, |
| num_addr_b_bank, num_di_b_bank, |
| num_si_b_bank, num_do_b_bank, num_so_b_bank, |
| num_banks_ver_dir * 2, num_banks_hor_dir * 2, |
| Data_in_htree, true); |
| htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, |
| num_addr_b_bank, num_di_b_bank, |
| num_si_b_bank, num_do_b_bank, |
| num_so_b_bank, num_banks_ver_dir * 2, |
| num_banks_hor_dir * 2, Data_out_htree, true); |
| htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, |
| num_addr_b_bank, num_di_b_bank, |
| num_si_b_bank, num_do_b_bank, |
| num_so_b_bank, num_banks_ver_dir * 2, |
| num_banks_hor_dir * 2, Data_in_htree, true); |
| htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, |
| num_addr_b_bank, num_di_b_bank, |
| num_si_b_bank, num_do_b_bank, |
| num_so_b_bank, num_banks_ver_dir * 2, |
| num_banks_hor_dir * 2, Data_out_htree, |
| true); |
| } |
| |
| area.w = htree_in_data->area.w; |
| area.h = htree_in_data->area.h; |
| |
| area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; |
| // cout<<"area cell"<<area_all_dataramcells<<endl; |
| // cout<<area.get_area()<<endl; |
| // delay calculation |
| double inrisetime = 0.0; |
| compute_delays(inrisetime); |
| compute_power_energy(); |
| } |
| |
| |
| |
| UCA::~UCA() { |
| delete htree_in_add; |
| delete htree_in_data; |
| delete htree_out_data; |
| } |
| |
| |
| |
| double UCA::compute_delays(double inrisetime) { |
| double outrisetime = bank.compute_delays(inrisetime); |
| |
| double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay; |
| double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; |
| delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + |
| bank.mat.sa_mux_lev_1_predec->delay + |
| bank.mat.sa_mux_lev_1_dec->delay; |
| delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + |
| bank.mat.sa_mux_lev_2_predec->delay + |
| bank.mat.sa_mux_lev_2_dec->delay; |
| double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; |
| |
| delay_before_subarray_output_driver = |
| MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path |
| delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path |
| MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path |
| delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path |
| delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + |
| bank.htree_out_data->delay + htree_out_data->delay; |
| access_time = bank.mat.delay_comparator; |
| |
| double ram_delay_inside_mat; |
| if (dp.fully_assoc) { |
| //delay of FA contains both CAM tag and RAM data |
| { //delay of CAM |
| ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; |
| access_time = htree_in_add->delay + bank.htree_in_add->delay; |
| //delay of fully-associative data array |
| access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; |
| } |
| } else { |
| access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path |
| } |
| |
| if (dp.is_main_mem) { |
| double t_rcd = max_delay_before_row_decoder + delay_inside_mat; |
| double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + |
| delay_from_subarray_out_drv_to_out; |
| access_time = t_rcd + cas_latency; |
| } |
| |
| double temp; |
| |
| if (!dp.fully_assoc) { |
| temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit |
| if (dp.is_dram) { |
| temp += bank.mat.delay_writeback; // temp stores random cycle time |
| } |
| |
| |
| temp = MAX(temp, bank.mat.r_predec->delay); |
| temp = MAX(temp, bank.mat.b_mux_predec->delay); |
| temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); |
| temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); |
| } else { |
| ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; |
| temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore |
| + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; |
| |
| temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. |
| temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); |
| temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); |
| } |
| |
| // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav |
| if (g_ip->rpters_in_htree == false) { |
| temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); |
| } |
| cycle_time = temp; |
| |
| double delay_req_network = max_delay_before_row_decoder; |
| double delay_rep_network = delay_from_subarray_out_drv_to_out; |
| multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); |
| |
| if (dp.is_main_mem) { |
| multisubbank_interleave_cycle_time = htree_in_add->delay; |
| precharge_delay = htree_in_add->delay + |
| bank.htree_in_add->delay + bank.mat.delay_writeback + |
| bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; |
| cycle_time = access_time + precharge_delay; |
| } else { |
| precharge_delay = 0; |
| } |
| |
| double dram_array_availability = 0; |
| if (dp.is_dram) { |
| dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; |
| } |
| |
| return outrisetime; |
| } |
| |
| |
| |
| // note: currently, power numbers are for a bank of an array |
| void UCA::compute_power_energy() { |
| bank.compute_power_energy(); |
| power = bank.power; |
| |
| power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; |
| power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; |
| if (dp.fully_assoc || dp.pure_cam) |
| power_routing_to_bank.searchOp.dynamic = |
| htree_in_search->power.searchOp.dynamic + |
| htree_out_search->power.searchOp.dynamic; |
| |
| power_routing_to_bank.readOp.leakage += |
| htree_in_add->power.readOp.leakage + |
| htree_in_data->power.readOp.leakage + |
| htree_out_data->power.readOp.leakage; |
| |
| power_routing_to_bank.readOp.gate_leakage += |
| htree_in_add->power.readOp.gate_leakage + |
| htree_in_data->power.readOp.gate_leakage + |
| htree_out_data->power.readOp.gate_leakage; |
| if (dp.fully_assoc || dp.pure_cam) { |
| power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; |
| power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; |
| } |
| |
| power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; |
| power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; |
| power.readOp.leakage += power_routing_to_bank.readOp.leakage; |
| power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; |
| |
| // calculate total write energy per access |
| power.writeOp.dynamic = power.readOp.dynamic |
| - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir |
| + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir |
| - power_routing_to_bank.readOp.dynamic |
| + power_routing_to_bank.writeOp.dynamic |
| + bank.htree_in_data->power.readOp.dynamic |
| - bank.htree_out_data->power.readOp.dynamic; |
| |
| if (dp.is_dram == false) { |
| power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; |
| } |
| |
| dyn_read_energy_from_closed_page = power.readOp.dynamic; |
| dyn_read_energy_from_open_page = power.readOp.dynamic - |
| (bank.mat.r_predec->power.readOp.dynamic + |
| bank.mat.power_row_decoders.readOp.dynamic + |
| bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + |
| bank.mat.power_sa.readOp.dynamic + |
| bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; |
| |
| dyn_read_energy_remaining_words_in_burst = |
| (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * |
| ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + |
| bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + |
| bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + |
| bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + |
| bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + |
| bank.htree_out_data->power.readOp.dynamic + |
| power_routing_to_bank.readOp.dynamic); |
| dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; |
| dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; |
| |
| activate_energy = htree_in_add->power.readOp.dynamic + |
| bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + |
| (bank.mat.r_predec->power.readOp.dynamic + |
| bank.mat.power_row_decoders.readOp.dynamic + |
| bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; |
| read_energy = (htree_in_add->power.readOp.dynamic + |
| bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + |
| (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + |
| bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + |
| bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + |
| bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + |
| bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + |
| bank.htree_out_data->power.readOp.dynamic + |
| htree_in_data->power.readOp.dynamic) * g_ip->burst_len; |
| write_energy = (htree_in_add->power.readOp.dynamic + |
| bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + |
| htree_in_data->power.readOp.dynamic + |
| bank.htree_in_data->power.readOp.dynamic + |
| (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + |
| bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + |
| bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + |
| bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; |
| precharge_energy = (bank.mat.power_bitline.readOp.dynamic + |
| bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; |
| |
| leak_power_subbank_closed_page = |
| (bank.mat.r_predec->power.readOp.leakage + |
| bank.mat.b_mux_predec->power.readOp.leakage + |
| bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + |
| bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + |
| bank.mat.power_row_decoders.readOp.leakage + |
| bank.mat.power_bit_mux_decoders.readOp.leakage + |
| bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + |
| bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + |
| bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; |
| |
| leak_power_subbank_closed_page += |
| (bank.mat.r_predec->power.readOp.gate_leakage + |
| bank.mat.b_mux_predec->power.readOp.gate_leakage + |
| bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + |
| bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + |
| bank.mat.power_row_decoders.readOp.gate_leakage + |
| bank.mat.power_bit_mux_decoders.readOp.gate_leakage + |
| bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + |
| bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ |
| //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; |
| |
| leak_power_subbank_open_page = |
| (bank.mat.r_predec->power.readOp.leakage + |
| bank.mat.b_mux_predec->power.readOp.leakage + |
| bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + |
| bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + |
| bank.mat.power_row_decoders.readOp.leakage + |
| bank.mat.power_bit_mux_decoders.readOp.leakage + |
| bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + |
| bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + |
| bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; |
| |
| leak_power_subbank_open_page += |
| (bank.mat.r_predec->power.readOp.gate_leakage + |
| bank.mat.b_mux_predec->power.readOp.gate_leakage + |
| bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + |
| bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + |
| bank.mat.power_row_decoders.readOp.gate_leakage + |
| bank.mat.power_bit_mux_decoders.readOp.gate_leakage + |
| bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + |
| bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; |
| //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; |
| |
| leak_power_request_and_reply_networks = |
| power_routing_to_bank.readOp.leakage + |
| bank.htree_in_add->power.readOp.leakage + |
| bank.htree_in_data->power.readOp.leakage + |
| bank.htree_out_data->power.readOp.leakage; |
| |
| leak_power_request_and_reply_networks += |
| power_routing_to_bank.readOp.gate_leakage + |
| bank.htree_in_add->power.readOp.gate_leakage + |
| bank.htree_in_data->power.readOp.gate_leakage + |
| bank.htree_out_data->power.readOp.gate_leakage; |
| |
| if (dp.fully_assoc || dp.pure_cam) { |
| leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; |
| leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; |
| } |
| |
| |
| // if DRAM, add contribution of power spent in row predecoder drivers, |
| // blocks and decoders to refresh power |
| if (dp.is_dram) { |
| refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + |
| bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; |
| refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; |
| refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; |
| refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; |
| refresh_power /= dp.dram_refresh_period; |
| } |
| |
| |
| if (dp.is_tag == false) { |
| power.readOp.dynamic = dyn_read_energy_from_closed_page; |
| power.writeOp.dynamic = dyn_read_energy_from_closed_page |
| - dyn_read_energy_remaining_words_in_burst |
| - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir |
| + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir |
| + (power_routing_to_bank.writeOp.dynamic - |
| power_routing_to_bank.readOp.dynamic - |
| bank.htree_out_data->power.readOp.dynamic + |
| bank.htree_in_data->power.readOp.dynamic) * |
| (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME |
| |
| if (dp.is_dram == false) { |
| power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; |
| } |
| } |
| |
| // if DRAM, add refresh power to total leakage |
| if (dp.is_dram) { |
| power.readOp.leakage += refresh_power; |
| } |
| |
| // TODO: below should be avoided. |
| /*if (dp.is_main_mem) |
| { |
| power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; |
| }*/ |
| |
| assert(power.readOp.dynamic > 0); |
| assert(power.writeOp.dynamic > 0); |
| assert(power.readOp.leakage > 0); |
| } |
| |