| /***************************************************************************** |
| * McPAT/CACTI |
| * SOFTWARE LICENSE AGREEMENT |
| * Copyright 2012 Hewlett-Packard Development Company, L.P. |
| * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. |
| * All Rights Reserved |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| ***************************************************************************/ |
| |
| |
| |
| #ifndef __CACTI_INTERFACE_H__ |
| #define __CACTI_INTERFACE_H__ |
| |
| #include <iostream> |
| #include <list> |
| #include <map> |
| #include <string> |
| #include <vector> |
| |
| #include "const.h" |
| |
| using namespace std; |
| |
| |
| class min_values_t; |
| class mem_array; |
| class uca_org_t; |
| |
| |
| class powerComponents { |
| public: |
| double dynamic; |
| double leakage; |
| double gate_leakage; |
| double short_circuit; |
| double longer_channel_leakage; |
| |
| powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { } |
| powerComponents(const powerComponents & obj) { |
| *this = obj; |
| } |
| powerComponents & operator=(const powerComponents & rhs) { |
| dynamic = rhs.dynamic; |
| leakage = rhs.leakage; |
| gate_leakage = rhs.gate_leakage; |
| short_circuit = rhs.short_circuit; |
| longer_channel_leakage = rhs.longer_channel_leakage; |
| return *this; |
| } |
| void reset() { |
| dynamic = 0; |
| leakage = 0; |
| gate_leakage = 0; |
| short_circuit = 0; |
| longer_channel_leakage = 0; |
| } |
| |
| friend powerComponents operator+(const powerComponents & x, const powerComponents & y); |
| friend powerComponents operator*(const powerComponents & x, double const * const y); |
| }; |
| |
| |
| |
| class powerDef { |
| public: |
| powerComponents readOp; |
| powerComponents writeOp; |
| powerComponents searchOp;//Sheng: for CAM and FA |
| |
| powerDef() : readOp(), writeOp(), searchOp() { } |
| void reset() { |
| readOp.reset(); |
| writeOp.reset(); |
| searchOp.reset(); |
| } |
| |
| friend powerDef operator+(const powerDef & x, const powerDef & y); |
| friend powerDef operator*(const powerDef & x, double const * const y); |
| }; |
| |
| enum Wire_type { |
| Global /* gloabl wires with repeaters */, |
| Global_5 /* 5% delay penalty */, |
| Global_10 /* 10% delay penalty */, |
| Global_20 /* 20% delay penalty */, |
| Global_30 /* 30% delay penalty */, |
| Low_swing /* differential low power wires with high area overhead */, |
| Semi_global /* mid-level wires with repeaters*/, |
| Transmission /* tranmission lines with high area overhead */, |
| Optical /* optical wires */, |
| Invalid_wtype |
| }; |
| |
| |
| |
| class InputParameter { |
| public: |
| void parse_cfg(const string & infile); |
| |
| // return false if the input parameters are problematic |
| bool error_checking(string name = "CACTI"); |
| void display_ip(); |
| |
| unsigned int cache_sz; // in bytes |
| unsigned int line_sz; |
| unsigned int assoc; |
| unsigned int nbanks; |
| unsigned int out_w;// == nr_bits_out |
| bool specific_tag; |
| unsigned int tag_w; |
| unsigned int access_mode; |
| unsigned int obj_func_dyn_energy; |
| unsigned int obj_func_dyn_power; |
| unsigned int obj_func_leak_power; |
| unsigned int obj_func_cycle_t; |
| |
| double F_sz_nm; // feature size in nm |
| double F_sz_um; // feature size in um |
| unsigned int num_rw_ports; |
| unsigned int num_rd_ports; |
| unsigned int num_wr_ports; |
| unsigned int num_se_rd_ports; // number of single ended read ports |
| unsigned int num_search_ports; // Sheng: number of search ports for CAM |
| bool is_main_mem; |
| bool is_cache; |
| bool pure_ram; |
| bool pure_cam; |
| bool rpters_in_htree; // if there are repeaters in htree segment |
| unsigned int ver_htree_wires_over_array; |
| unsigned int broadcast_addr_din_over_ver_htrees; |
| unsigned int temp; |
| |
| unsigned int ram_cell_tech_type; |
| unsigned int peri_global_tech_type; |
| unsigned int data_arr_ram_cell_tech_type; |
| unsigned int data_arr_peri_global_tech_type; |
| unsigned int tag_arr_ram_cell_tech_type; |
| unsigned int tag_arr_peri_global_tech_type; |
| |
| unsigned int burst_len; |
| unsigned int int_prefetch_w; |
| unsigned int page_sz_bits; |
| |
| unsigned int ic_proj_type; // interconnect_projection_type |
| unsigned int wire_is_mat_type; // wire_inside_mat_type |
| unsigned int wire_os_mat_type; // wire_outside_mat_type |
| enum Wire_type wt; |
| int force_wiretype; |
| bool print_input_args; |
| unsigned int nuca_cache_sz; // TODO |
| int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm; |
| bool force_cache_config; |
| |
| int cache_level; |
| int cores; |
| int nuca_bank_count; |
| int force_nuca_bank; |
| |
| int delay_wt, dynamic_power_wt, leakage_power_wt, |
| cycle_time_wt, area_wt; |
| int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca, |
| cycle_time_wt_nuca, area_wt_nuca; |
| |
| int delay_dev, dynamic_power_dev, leakage_power_dev, |
| cycle_time_dev, area_dev; |
| int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca, |
| cycle_time_dev_nuca, area_dev_nuca; |
| int ed; //ED or ED2 optimization |
| int nuca; |
| |
| bool fast_access; |
| unsigned int block_sz; // bytes |
| unsigned int tag_assoc; |
| unsigned int data_assoc; |
| bool is_seq_acc; |
| bool fully_assoc; |
| unsigned int nsets; // == number_of_sets |
| int print_detail; |
| |
| |
| bool add_ecc_b_; |
| //parameters for design constraint |
| double throughput; |
| double latency; |
| bool pipelinable; |
| int pipeline_stages; |
| int per_stage_vector; |
| bool with_clock_grid; |
| }; |
| |
| |
| typedef struct { |
| int Ndwl; |
| int Ndbl; |
| double Nspd; |
| int deg_bl_muxing; |
| int Ndsam_lev_1; |
| int Ndsam_lev_2; |
| int number_activated_mats_horizontal_direction; |
| int number_subbanks; |
| int page_size_in_bits; |
| double delay_route_to_bank; |
| double delay_crossbar; |
| double delay_addr_din_horizontal_htree; |
| double delay_addr_din_vertical_htree; |
| double delay_row_predecode_driver_and_block; |
| double delay_row_decoder; |
| double delay_bitlines; |
| double delay_sense_amp; |
| double delay_subarray_output_driver; |
| double delay_bit_mux_predecode_driver_and_block; |
| double delay_bit_mux_decoder; |
| double delay_senseamp_mux_lev_1_predecode_driver_and_block; |
| double delay_senseamp_mux_lev_1_decoder; |
| double delay_senseamp_mux_lev_2_predecode_driver_and_block; |
| double delay_senseamp_mux_lev_2_decoder; |
| double delay_input_htree; |
| double delay_output_htree; |
| double delay_dout_vertical_htree; |
| double delay_dout_horizontal_htree; |
| double delay_comparator; |
| double access_time; |
| double cycle_time; |
| double multisubbank_interleave_cycle_time; |
| double delay_request_network; |
| double delay_inside_mat; |
| double delay_reply_network; |
| double trcd; |
| double cas_latency; |
| double precharge_delay; |
| powerDef power_routing_to_bank; |
| powerDef power_addr_input_htree; |
| powerDef power_data_input_htree; |
| powerDef power_data_output_htree; |
| powerDef power_addr_horizontal_htree; |
| powerDef power_datain_horizontal_htree; |
| powerDef power_dataout_horizontal_htree; |
| powerDef power_addr_vertical_htree; |
| powerDef power_datain_vertical_htree; |
| powerDef power_row_predecoder_drivers; |
| powerDef power_row_predecoder_blocks; |
| powerDef power_row_decoders; |
| powerDef power_bit_mux_predecoder_drivers; |
| powerDef power_bit_mux_predecoder_blocks; |
| powerDef power_bit_mux_decoders; |
| powerDef power_senseamp_mux_lev_1_predecoder_drivers; |
| powerDef power_senseamp_mux_lev_1_predecoder_blocks; |
| powerDef power_senseamp_mux_lev_1_decoders; |
| powerDef power_senseamp_mux_lev_2_predecoder_drivers; |
| powerDef power_senseamp_mux_lev_2_predecoder_blocks; |
| powerDef power_senseamp_mux_lev_2_decoders; |
| powerDef power_bitlines; |
| powerDef power_sense_amps; |
| powerDef power_prechg_eq_drivers; |
| powerDef power_output_drivers_at_subarray; |
| powerDef power_dataout_vertical_htree; |
| powerDef power_comparators; |
| powerDef power_crossbar; |
| powerDef total_power; |
| double area; |
| double all_banks_height; |
| double all_banks_width; |
| double bank_height; |
| double bank_width; |
| double subarray_memory_cell_area_height; |
| double subarray_memory_cell_area_width; |
| double mat_height; |
| double mat_width; |
| double routing_area_height_within_bank; |
| double routing_area_width_within_bank; |
| double area_efficiency; |
| double refresh_power; |
| double dram_refresh_period; |
| double dram_array_availability; |
| double dyn_read_energy_from_closed_page; |
| double dyn_read_energy_from_open_page; |
| double leak_power_subbank_closed_page; |
| double leak_power_subbank_open_page; |
| double leak_power_request_and_reply_networks; |
| double activate_energy; |
| double read_energy; |
| double write_energy; |
| double precharge_energy; |
| } results_mem_array; |
| |
| |
| class uca_org_t { |
| public: |
| mem_array * tag_array2; |
| mem_array * data_array2; |
| double access_time; |
| double cycle_time; |
| double area; |
| double area_efficiency; |
| powerDef power; |
| double leak_power_with_sleep_transistors_in_mats; |
| double cache_ht; |
| double cache_len; |
| char file_n[100]; |
| double vdd_periph_global; |
| bool valid; |
| results_mem_array tag_array; |
| results_mem_array data_array; |
| |
| uca_org_t(); |
| void find_delay(); |
| void find_energy(); |
| void find_area(); |
| void find_cyc(); |
| void adjust_area();//for McPAT only to adjust routing overhead |
| void cleanup(); |
| ~uca_org_t() {}; |
| }; |
| |
| void reconfigure(InputParameter *local_interface, uca_org_t *fin_res); |
| |
| uca_org_t cacti_interface(const string & infile_name); |
| //McPAT's plain interface, please keep !!! |
| uca_org_t cacti_interface(InputParameter * const local_interface); |
| //McPAT's plain interface, please keep !!! |
| uca_org_t init_interface(InputParameter * const local_interface, |
| const string &name); |
| //McPAT's plain interface, please keep !!! |
| uca_org_t cacti_interface( |
| int cache_size, |
| int line_size, |
| int associativity, |
| int rw_ports, |
| int excl_read_ports, |
| int excl_write_ports, |
| int single_ended_read_ports, |
| int search_ports, |
| int banks, |
| double tech_node, |
| int output_width, |
| int specific_tag, |
| int tag_width, |
| int access_mode, |
| int cache, |
| int main_mem, |
| int obj_func_delay, |
| int obj_func_dynamic_power, |
| int obj_func_leakage_power, |
| int obj_func_cycle_time, |
| int obj_func_area, |
| int dev_func_delay, |
| int dev_func_dynamic_power, |
| int dev_func_leakage_power, |
| int dev_func_area, |
| int dev_func_cycle_time, |
| int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate |
| int temp, |
| int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing |
| int data_arr_ram_cell_tech_flavor_in, |
| int data_arr_peri_global_tech_flavor_in, |
| int tag_arr_ram_cell_tech_flavor_in, |
| int tag_arr_peri_global_tech_flavor_in, |
| int interconnect_projection_type_in, |
| int wire_inside_mat_type_in, |
| int wire_outside_mat_type_in, |
| int REPEATERS_IN_HTREE_SEGMENTS_in, |
| int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, |
| int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, |
| int PAGE_SIZE_BITS_in, |
| int BURST_LENGTH_in, |
| int INTERNAL_PREFETCH_WIDTH_in, |
| int force_wiretype, |
| int wiretype, |
| int force_config, |
| int ndwl, |
| int ndbl, |
| int nspd, |
| int ndcm, |
| int ndsam1, |
| int ndsam2, |
| int ecc); |
| |
| //Naveen's interface |
| uca_org_t cacti_interface( |
| int cache_size, |
| int line_size, |
| int associativity, |
| int rw_ports, |
| int excl_read_ports, |
| int excl_write_ports, |
| int single_ended_read_ports, |
| int banks, |
| double tech_node, |
| int page_sz, |
| int burst_length, |
| int pre_width, |
| int output_width, |
| int specific_tag, |
| int tag_width, |
| int access_mode, //0 normal, 1 seq, 2 fast |
| int cache, //scratch ram or cache |
| int main_mem, |
| int obj_func_delay, |
| int obj_func_dynamic_power, |
| int obj_func_leakage_power, |
| int obj_func_area, |
| int obj_func_cycle_time, |
| int dev_func_delay, |
| int dev_func_dynamic_power, |
| int dev_func_leakage_power, |
| int dev_func_area, |
| int dev_func_cycle_time, |
| int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate |
| int temp, |
| int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing |
| int data_arr_ram_cell_tech_flavor_in, |
| int data_arr_peri_global_tech_flavor_in, |
| int tag_arr_ram_cell_tech_flavor_in, |
| int tag_arr_peri_global_tech_flavor_in, |
| int interconnect_projection_type_in, // 0 - aggressive, 1 - normal |
| int wire_inside_mat_type_in, |
| int wire_outside_mat_type_in, |
| int is_nuca, // 0 - UCA, 1 - NUCA |
| int core_count, |
| int cache_level, // 0 - L2, 1 - L3 |
| int nuca_bank_count, |
| int nuca_obj_func_delay, |
| int nuca_obj_func_dynamic_power, |
| int nuca_obj_func_leakage_power, |
| int nuca_obj_func_area, |
| int nuca_obj_func_cycle_time, |
| int nuca_dev_func_delay, |
| int nuca_dev_func_dynamic_power, |
| int nuca_dev_func_leakage_power, |
| int nuca_dev_func_area, |
| int nuca_dev_func_cycle_time, |
| int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported |
| int p_input); |
| |
| class mem_array { |
| public: |
| int Ndcm; |
| int Ndwl; |
| int Ndbl; |
| double Nspd; |
| int deg_bl_muxing; |
| int Ndsam_lev_1; |
| int Ndsam_lev_2; |
| double access_time; |
| double cycle_time; |
| double multisubbank_interleave_cycle_time; |
| double area_ram_cells; |
| double area; |
| powerDef power; |
| double delay_senseamp_mux_decoder; |
| double delay_before_subarray_output_driver; |
| double delay_from_subarray_output_driver_to_output; |
| double height; |
| double width; |
| |
| double mat_height; |
| double mat_length; |
| double subarray_length; |
| double subarray_height; |
| |
| double delay_route_to_bank, |
| delay_input_htree, |
| delay_row_predecode_driver_and_block, |
| delay_row_decoder, |
| delay_bitlines, |
| delay_sense_amp, |
| delay_subarray_output_driver, |
| delay_dout_htree, |
| delay_comparator, |
| delay_matchlines; |
| |
| double all_banks_height, |
| all_banks_width, |
| area_efficiency; |
| |
| powerDef power_routing_to_bank; |
| powerDef power_addr_input_htree; |
| powerDef power_data_input_htree; |
| powerDef power_data_output_htree; |
| powerDef power_htree_in_search; |
| powerDef power_htree_out_search; |
| powerDef power_row_predecoder_drivers; |
| powerDef power_row_predecoder_blocks; |
| powerDef power_row_decoders; |
| powerDef power_bit_mux_predecoder_drivers; |
| powerDef power_bit_mux_predecoder_blocks; |
| powerDef power_bit_mux_decoders; |
| powerDef power_senseamp_mux_lev_1_predecoder_drivers; |
| powerDef power_senseamp_mux_lev_1_predecoder_blocks; |
| powerDef power_senseamp_mux_lev_1_decoders; |
| powerDef power_senseamp_mux_lev_2_predecoder_drivers; |
| powerDef power_senseamp_mux_lev_2_predecoder_blocks; |
| powerDef power_senseamp_mux_lev_2_decoders; |
| powerDef power_bitlines; |
| powerDef power_sense_amps; |
| powerDef power_prechg_eq_drivers; |
| powerDef power_output_drivers_at_subarray; |
| powerDef power_dataout_vertical_htree; |
| powerDef power_comparators; |
| |
| powerDef power_cam_bitline_precharge_eq_drv; |
| powerDef power_searchline; |
| powerDef power_searchline_precharge; |
| powerDef power_matchlines; |
| powerDef power_matchline_precharge; |
| powerDef power_matchline_to_wordline_drv; |
| |
| min_values_t *arr_min; |
| enum Wire_type wt; |
| |
| // dram stats |
| double activate_energy, read_energy, write_energy, precharge_energy, |
| refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page, |
| leak_power_request_and_reply_networks; |
| |
| double precharge_delay; |
| |
| static bool lt(const mem_array * m1, const mem_array * m2); |
| }; |
| |
| |
| #endif |