| /***************************************************************************** |
| * McPAT |
| * SOFTWARE LICENSE AGREEMENT |
| * Copyright 2012 Hewlett-Packard Development Company, L.P. |
| * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. |
| * All Rights Reserved |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| ***************************************************************************/ |
| |
| #include <algorithm> |
| #include <cassert> |
| #include <cmath> |
| #include <iostream> |
| #include <sstream> |
| #include <string> |
| |
| #include "basic_circuit.h" |
| #include "basic_components.h" |
| #include "common.h" |
| #include "const.h" |
| #include "core.h" |
| #include "io.h" |
| #include "parameter.h" |
| |
| int RegFU::RFWIN_ACCESS_MULTIPLIER = 16; |
| |
| // The five bits are: busy, Issued, Finished, speculative, valid |
| int SchedulerU::ROB_STATUS_BITS = 5; |
| |
| InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_, |
| const CoreParameters & _core_params, |
| const CoreStatistics & _core_stats, bool exist_) |
| : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL), |
| BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL), |
| interface_ip(*interface_ip_), |
| core_params(_core_params), core_stats(_core_stats), exist(exist_) { |
| if (!exist) return; |
| int idx, tag, data, size, line, assoc, banks; |
| bool is_default = true; |
| |
| clockRate = core_params.clockRate; |
| name = "Instruction Fetch Unit"; |
| // Check if there is an icache child: |
| int i; |
| icache = NULL; |
| for( i = 0; i < xml_data->nChildNode("component"); i++ ) { |
| XMLNode* childXML = xml_data->getChildNodePtr("component", &i); |
| XMLCSTR type = childXML->getAttribute("type"); |
| |
| if (!type) |
| warnMissingComponentType(childXML->getAttribute("id")); |
| |
| STRCMP(type, "CacheUnit") { |
| XMLCSTR name = childXML->getAttribute("name"); |
| if (strcmp(name, "Instruction Cache") == 0 || |
| strcmp(name, "icache") == 0) { |
| icache = new CacheUnit(childXML, &interface_ip); |
| children.push_back(icache); |
| } |
| } |
| } |
| |
| set_params_stats(); |
| |
| //Instruction buffer |
| data = core_params.instruction_length * core_params.peak_issueW; |
| line = int(ceil(data / BITS_PER_BYTE)); |
| size = core_params.num_hthreads * core_params.instruction_buffer_size * |
| line; |
| if (size < MIN_BUFFER_SIZE) { |
| size = MIN_BUFFER_SIZE; |
| } |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = core_params.instruction_buffer_assoc; |
| interface_ip.nbanks = core_params.instruction_buffer_nbanks; |
| interface_ip.out_w = line * BITS_PER_BYTE; |
| interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0; |
| interface_ip.tag_w = core_params.instruction_buffer_tag_width; |
| interface_ip.access_mode = Normal; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = |
| core_params.number_instruction_fetch_ports; |
| interface_ip.num_rd_ports = 0; |
| interface_ip.num_wr_ports = 0; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_ram = true; |
| interface_ip.pure_cam = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| |
| IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| IB->area.set_area(IB->area.get_area() + IB->local_result.area); |
| area.set_area(area.get_area() + IB->local_result.area); |
| |
| if (core_params.predictionW > 0) { |
| /* |
| * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged |
| * It is only a cache without all the buffers in the cache controller since it is more like a |
| * look up table than a cache with cache controller. When access miss, no load from other places |
| * such as main memory (not actively fill the misses), it is passively updated under two circumstances: |
| * 1) when BPT@ID stage finds out current is a taken branch while BTB missed |
| * 2) When BPT@ID stage predicts differently than BTB |
| * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) |
| * 4) when EXEU find out wrong target has been provided from BTB. |
| * |
| */ |
| size = inst_fetch_params.btb_size; |
| line = inst_fetch_params.btb_block_size; |
| assoc = inst_fetch_params.btb_assoc; |
| banks = inst_fetch_params.btb_num_banks; |
| idx = int(ceil(log2(size / line / assoc))); |
| tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads))) |
| + EXTRA_TAG_BITS; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = assoc; |
| interface_ip.nbanks = banks; |
| interface_ip.out_w = line * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Normal; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 1; |
| interface_ip.num_rd_ports = core_params.predictionW; |
| interface_ip.num_wr_ports = core_params.predictionW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = true; |
| interface_ip.pure_ram = false; |
| interface_ip.pure_cam = false; |
| interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate; |
| interface_ip.latency = inst_fetch_params.btb_latency / clockRate; |
| |
| BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| area.set_area(area.get_area() + BTB->local_result.area); |
| |
| BPT = new BranchPredictor(xml_data, &interface_ip, |
| core_params, core_stats); |
| area.set_area(area.get_area() + BPT->area.get_area()); |
| } |
| |
| ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder", |
| is_default, &interface_ip, |
| core_params.opcode_width, |
| core_params.decodeW, |
| core_params.x86, clockRate, |
| Core_device, core_params.core_ty); |
| |
| ID_operand = new InstructionDecoder(xml_data, |
| "Instruction Operand Decoder", |
| is_default, &interface_ip, |
| core_params.arch_ireg_width, |
| core_params.decodeW, |
| core_params.x86, clockRate, |
| Core_device, core_params.core_ty); |
| |
| ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder", |
| is_default, &interface_ip, |
| core_params.micro_opcode_length, |
| core_params.decodeW, |
| core_params.x86, clockRate, |
| Core_device, core_params.core_ty); |
| area.set_area(area.get_area()+ (ID_inst->area.get_area() |
| + ID_operand->area.get_area() |
| + ID_misc->area.get_area()) |
| * core_params.decodeW); |
| } |
| |
| void |
| InstFetchU::set_params_stats() { |
| int num_children = xml_data->nChildNode("component"); |
| int i; |
| memset(&inst_fetch_params,0,sizeof(InstFetchParameters)); |
| for (i = 0; i < num_children; i++) { |
| XMLNode* child = xml_data->getChildNodePtr("component", &i); |
| XMLCSTR type = child->getAttribute("type"); |
| |
| if (!type) |
| warnMissingComponentType(child->getAttribute("id")); |
| |
| STRCMP(type, "BranchTargetBuffer") { |
| int sub_num_children = child->nChildNode("param"); |
| int j; |
| for (j = 0; j < sub_num_children; j++) { |
| XMLNode* paramNode = child->getChildNodePtr("param", &j); |
| XMLCSTR node_name = paramNode->getAttribute("name"); |
| XMLCSTR value = paramNode->getAttribute("value"); |
| |
| if (!node_name) |
| warnMissingParamName(paramNode->getAttribute("id")); |
| |
| ASSIGN_INT_IF("size", inst_fetch_params.btb_size); |
| ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size); |
| ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc); |
| ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks); |
| ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency); |
| ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput); |
| ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports); |
| |
| else { |
| warnUnrecognizedParam(node_name); |
| } |
| } |
| |
| sub_num_children = child->nChildNode("stat"); |
| for (j = 0; j < sub_num_children; j++) { |
| XMLNode* statNode = child->getChildNodePtr("stat", &j); |
| XMLCSTR node_name = statNode->getAttribute("name"); |
| XMLCSTR value = statNode->getAttribute("value"); |
| |
| if (!node_name) |
| warnMissingStatName(statNode->getAttribute("id")); |
| |
| ASSIGN_FP_IF("read_accesses", |
| inst_fetch_stats.btb_read_accesses); |
| ASSIGN_FP_IF("write_accesses", |
| inst_fetch_stats.btb_write_accesses); |
| else { |
| warnUnrecognizedStat(node_name); |
| } |
| } |
| } |
| } |
| |
| // Parameter sanity check |
| if (inst_fetch_params.btb_size <= 0) { |
| errorNonPositiveParam("size"); |
| } |
| |
| if (inst_fetch_params.btb_block_size <= 0) { |
| errorNonPositiveParam("block_size"); |
| } |
| |
| if (inst_fetch_params.btb_assoc <= 0) { |
| errorNonPositiveParam("assoc"); |
| } |
| |
| if (inst_fetch_params.btb_num_banks <= 0) { |
| errorNonPositiveParam("num_banks"); |
| } |
| } |
| |
| BranchPredictor::BranchPredictor(XMLNode* _xml_data, |
| InputParameter* interface_ip_, |
| const CoreParameters & _core_params, |
| const CoreStatistics & _core_stats, |
| bool exist_) |
| : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL), |
| L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL), |
| interface_ip(*interface_ip_), |
| core_params(_core_params), core_stats(_core_stats), exist(exist_) { |
| if (!exist) return; |
| int tag; |
| int data; |
| int size; |
| |
| clockRate = core_params.clockRate; |
| name = "Branch Predictor"; |
| |
| // Common interface parameters for the branch predictor structures |
| interface_ip.pure_cam = false; |
| |
| if (core_params.multithreaded) { |
| tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS); |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.is_cache = true; |
| interface_ip.pure_ram = false; |
| } else { |
| interface_ip.specific_tag = 0; |
| interface_ip.tag_w = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_ram = true; |
| } |
| |
| // Parse params and stats from XML |
| set_params_stats(); |
| |
| // Common interface parameters for the branch predictor structures |
| interface_ip.assoc = branch_pred_params.assoc; |
| interface_ip.nbanks = branch_pred_params.nbanks; |
| |
| //Global predictor |
| data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE)); |
| size = data * branch_pred_params.global_predictor_entries; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.predictionW; |
| interface_ip.num_wr_ports = core_params.predictionW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| area.set_area(area.get_area() + globalBPT->local_result.area); |
| |
| //Local BPT (Level 1) |
| data = int(ceil(branch_pred_params.local_l1_predictor_size / |
| BITS_PER_BYTE)); |
| size = data * branch_pred_params.local_predictor_entries; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.predictionW; |
| interface_ip.num_wr_ports = core_params.predictionW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| L1_localBPT = new ArrayST(xml_data, &interface_ip, |
| "Local Predictor, Level 1", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| L1_localBPT->area.set_area(L1_localBPT->area.get_area() + |
| L1_localBPT->local_result.area); |
| area.set_area(area.get_area()+ L1_localBPT->local_result.area); |
| |
| //Local BPT (Level 2) |
| data = int(ceil(branch_pred_params.local_l2_predictor_size / |
| BITS_PER_BYTE)); |
| size = data * branch_pred_params.local_predictor_entries; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.predictionW; |
| interface_ip.num_wr_ports = core_params.predictionW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| L2_localBPT = new ArrayST(xml_data, &interface_ip, |
| "Local Predictor, Level 2", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| area.set_area(area.get_area() + L2_localBPT->local_result.area); |
| |
| //Chooser |
| data = int(ceil(branch_pred_params.chooser_predictor_bits / |
| BITS_PER_BYTE)); |
| size = data * branch_pred_params.chooser_predictor_entries; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.predictionW; |
| interface_ip.num_wr_ports = core_params.predictionW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| area.set_area(area.get_area() + chooser->local_result.area); |
| |
| //RAS return address stacks are Duplicated for each thread. |
| data = int(ceil(core_params.pc_width / BITS_PER_BYTE)); |
| size = data * core_params.RAS_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.predictionW; |
| interface_ip.num_wr_ports = core_params.predictionW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate, |
| core_params.opt_local, core_params.core_ty); |
| RAS->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + RAS->local_result.area * |
| core_params.num_hthreads); |
| |
| } |
| |
| void |
| BranchPredictor::set_params_stats() { |
| int num_children = xml_data->nChildNode("component"); |
| int i; |
| for (i = 0; i < num_children; i++) { |
| XMLNode* child = xml_data->getChildNodePtr("component", &i); |
| XMLCSTR type = child->getAttribute("type"); |
| |
| if (!type) |
| warnMissingComponentType(child->getAttribute("id")); |
| |
| STRCMP(type, "BranchPredictor") { |
| int sub_num_children = child->nChildNode("param"); |
| int j; |
| for (j = 0; j < sub_num_children; j++) { |
| XMLNode* paramNode = child->getChildNodePtr("param", &j); |
| XMLCSTR node_name = paramNode->getAttribute("name"); |
| XMLCSTR value = paramNode->getAttribute("value"); |
| |
| if (!node_name) |
| warnMissingParamName(paramNode->getAttribute("id")); |
| |
| ASSIGN_INT_IF("assoc", branch_pred_params.assoc); |
| ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks); |
| ASSIGN_INT_IF("local_l1_predictor_size", |
| branch_pred_params.local_l1_predictor_size); |
| ASSIGN_INT_IF("local_l2_predictor_size", |
| branch_pred_params.local_l2_predictor_size); |
| ASSIGN_INT_IF("local_predictor_entries", |
| branch_pred_params.local_predictor_entries); |
| ASSIGN_INT_IF("global_predictor_entries", |
| branch_pred_params.global_predictor_entries); |
| ASSIGN_INT_IF("global_predictor_bits", |
| branch_pred_params.global_predictor_bits); |
| ASSIGN_INT_IF("chooser_predictor_entries", |
| branch_pred_params.chooser_predictor_entries); |
| ASSIGN_INT_IF("chooser_predictor_bits", |
| branch_pred_params.chooser_predictor_bits); |
| |
| else { |
| warnUnrecognizedParam(node_name); |
| } |
| } |
| // The core reads in the number of branches and the number of |
| // function calls and these values are passed through the |
| // core_stats variable, so we don't need to read them in here |
| } |
| } |
| } |
| |
| SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_, |
| const CoreParameters & _core_params, |
| const CoreStatistics & _core_stats, bool exist_) |
| : McPATComponent(_xml_data), int_inst_window(NULL), |
| fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL), |
| fp_instruction_selection(NULL), |
| interface_ip(*interface_ip_), |
| core_params(_core_params), core_stats(_core_stats), exist(exist_) { |
| if (!exist) return; |
| int tag; |
| int data; |
| int size; |
| int line; |
| bool is_default = true; |
| string tmp_name; |
| |
| clockRate = core_params.clockRate; |
| name = "Instruction Scheduler"; |
| if ((core_params.core_ty == Inorder && core_params.multithreaded)) { |
| //Instruction issue queue, in-order multi-issue or multithreaded |
| //processor also has this structure. Unified window for Inorder |
| //processors |
| //This tag width is the normal thread state bits based on |
| //Niagara Design |
| tag = int(log2(core_params.num_hthreads) * core_params.perThreadState); |
| data = core_params.instruction_length; |
| line = int(ceil(data / BITS_PER_BYTE)); |
| size = core_params.instruction_window_size * line; |
| if (size < MIN_BUFFER_SIZE) { |
| size = MIN_BUFFER_SIZE; |
| } |
| |
| //NOTE: x86 inst can be very lengthy, up to 15B. |
| //Source: Intel® 64 and IA-32 Architectures |
| //Software Developer’s Manual |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = core_params.scheduler_assoc; |
| interface_ip.nbanks = core_params.scheduler_nbanks; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.peak_issueW; |
| interface_ip.num_wr_ports = core_params.peak_issueW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = core_params.peak_issueW; |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| int_inst_window = new ArrayST(xml_data, &interface_ip, |
| "InstFetchQueue", Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| int_inst_window->output_data.area *= core_params.num_pipelines; |
| area.set_area(area.get_area() + int_inst_window->local_result.area * |
| core_params.num_pipelines); |
| Iw_height = int_inst_window->local_result.cache_ht; |
| |
| /* |
| * selection logic |
| * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up |
| * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who |
| * at the issue stage. |
| */ |
| |
| int_instruction_selection = |
| new selection_logic(xml_data, is_default, |
| core_params.instruction_window_size, |
| core_params.peak_issueW * |
| core_params.num_hthreads, |
| &interface_ip, |
| "Int Instruction Selection Logic", |
| core_stats.inst_window_wakeup_accesses, |
| clockRate, Core_device, core_params.core_ty); |
| |
| if (core_params.fp_instruction_window_size > 0) { |
| fp_instruction_selection = |
| new selection_logic(xml_data, is_default, |
| core_params.fp_instruction_window_size, |
| core_params.fp_issueW * |
| core_params.num_hthreads, |
| &interface_ip, |
| "FP Instruction Selection Logic", |
| core_stats.fp_inst_window_wakeup_accesses, |
| clockRate, Core_device, |
| core_params.core_ty); |
| } |
| } |
| |
| if (core_params.core_ty == OOO) { |
| /* |
| * CAM based instruction window |
| * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored |
| * For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored |
| * It is written once and read twice(two operands) before an instruction can be issued. |
| * X86 instruction can be very long up to 15B. add instruction length in XML |
| */ |
| if (core_params.scheu_ty == PhysicalRegFile) { |
| tag = core_params.phy_ireg_width; |
| data = int((ceil((core_params.instruction_length + |
| NUM_SOURCE_OPERANDS * |
| (core_params.phy_ireg_width - |
| core_params.arch_ireg_width)) / |
| (double)NUM_SOURCE_OPERANDS) / |
| BITS_PER_BYTE)); |
| tmp_name = "Integer Instruction Window"; |
| } else { |
| tag = core_params.phy_ireg_width; |
| data = int(ceil(((core_params.instruction_length + |
| NUM_SOURCE_OPERANDS * |
| (core_params.phy_ireg_width - |
| core_params.arch_ireg_width) + |
| 2 * core_params.int_data_width) / |
| (double)NUM_SOURCE_OPERANDS) / |
| BITS_PER_BYTE)); |
| tmp_name = "Integer Reservation Station"; |
| } |
| |
| size = data * core_params.instruction_window_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = core_params.scheduler_assoc; |
| interface_ip.nbanks = core_params.scheduler_nbanks; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Normal; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.peak_issueW; |
| interface_ip.num_wr_ports = core_params.peak_issueW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = core_params.peak_issueW; |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate; |
| interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate; |
| int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name, |
| Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| int_inst_window->output_data.area *= core_params.num_pipelines; |
| area.set_area(area.get_area() + int_inst_window->local_result.area * |
| core_params.num_pipelines); |
| Iw_height = int_inst_window->local_result.cache_ht; |
| |
| //FU inst window |
| if (core_params.scheu_ty == PhysicalRegFile) { |
| tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width; |
| data = int(ceil((core_params.instruction_length + |
| NUM_SOURCE_OPERANDS * |
| (core_params.phy_freg_width - |
| core_params.arch_freg_width)) / BITS_PER_BYTE)); |
| tmp_name = "FP Instruction Window"; |
| } else { |
| tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width; |
| data = int(ceil((core_params.instruction_length + |
| NUM_SOURCE_OPERANDS * |
| (core_params.phy_freg_width - |
| core_params.arch_freg_width) + |
| NUM_SOURCE_OPERANDS * core_params.fp_data_width) / |
| BITS_PER_BYTE)); |
| tmp_name = "FP Reservation Station"; |
| } |
| |
| size = data * core_params.fp_instruction_window_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = core_params.scheduler_assoc; |
| interface_ip.nbanks = core_params.scheduler_nbanks; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Normal; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.fp_issueW; |
| interface_ip.num_wr_ports = core_params.fp_issueW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = core_params.fp_issueW; |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| fp_inst_window = |
| new ArrayST(xml_data, &interface_ip, tmp_name, Core_device, |
| clockRate, core_params.opt_local, core_params.core_ty); |
| fp_inst_window->output_data.area *= core_params.num_fp_pipelines; |
| area.set_area(area.get_area() + fp_inst_window->local_result.area |
| *core_params.num_fp_pipelines); |
| fp_Iw_height = fp_inst_window->local_result.cache_ht; |
| |
| if (core_params.ROB_size > 0) { |
| /* |
| * if ROB_size = 0, then the target processor does not support hardware-based |
| * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which |
| * means branch must be resolved before instruction issued into instruction window, since |
| * there is no change to flush miss-predict branch path after instructions are issued in this situation. |
| * |
| * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. |
| * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. |
| * However, this approach is abandoned due to its high power and poor scalablility. |
| * McPAT uses current implementation of ROB as circular buffer. |
| * ROB is written once when instruction is issued and read once when the instruction is committed. * |
| */ |
| int robExtra = int(ceil(ROB_STATUS_BITS + |
| log2(core_params.num_hthreads))); |
| |
| if (core_params.scheu_ty == PhysicalRegFile) { |
| //PC is to id the instruction for recover exception. |
| //inst is used to map the renamed dest. registers. so that |
| //commit stage can know which reg/RRAT to update |
| data = int(ceil((robExtra + core_params.pc_width + |
| core_params.phy_ireg_width) / BITS_PER_BYTE)); |
| } else { |
| //in RS based OOO, ROB also contains value of destination reg |
| data = int(ceil((robExtra + core_params.pc_width + |
| core_params.phy_ireg_width + |
| core_params.fp_data_width) / BITS_PER_BYTE)); |
| } |
| |
| interface_ip.cache_sz = data * core_params.ROB_size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = core_params.ROB_assoc; |
| interface_ip.nbanks = core_params.ROB_nbanks; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.specific_tag = core_params.ROB_tag_width > 0; |
| interface_ip.tag_w = core_params.ROB_tag_width; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.peak_commitW; |
| interface_ip.num_wr_ports = core_params.peak_issueW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| ROB->output_data.area *= core_params.num_pipelines; |
| area.set_area(area.get_area() + ROB->local_result.area * |
| core_params.num_pipelines); |
| ROB_height = ROB->local_result.cache_ht; |
| } |
| |
| int_instruction_selection = |
| new selection_logic(xml_data, is_default, |
| core_params.instruction_window_size, |
| core_params.peak_issueW, &interface_ip, |
| "Int Instruction Selection Logic", |
| core_stats.inst_window_wakeup_accesses, |
| clockRate, Core_device, core_params.core_ty); |
| |
| if (core_params.fp_instruction_window_size > 0) { |
| fp_instruction_selection = |
| new selection_logic(xml_data, is_default, |
| core_params.fp_instruction_window_size, |
| core_params.fp_issueW, &interface_ip, |
| "FP Instruction Selection Logic", |
| core_stats.fp_inst_window_wakeup_accesses, |
| clockRate, Core_device, |
| core_params.core_ty); |
| } |
| |
| } |
| } |
| |
| LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_, |
| const CoreParameters & _core_params, |
| const CoreStatistics & _core_stats, bool exist_) |
| : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL), |
| interface_ip(*interface_ip_), |
| core_params(_core_params), core_stats(_core_stats), exist(exist_) { |
| if (!exist) return; |
| int tag; |
| int line; |
| int size; |
| int ldst_opcode = core_params.opcode_width; |
| |
| clockRate = core_params.clockRate; |
| name = "Load/Store Unit"; |
| |
| // Check if there is a dcache child: |
| int i; |
| dcache = NULL; |
| for( i = 0; i < xml_data->nChildNode("component"); i++ ) { |
| XMLNode* childXML = xml_data->getChildNodePtr("component", &i); |
| XMLCSTR type = childXML->getAttribute("type"); |
| |
| if (!type) |
| warnMissingComponentType(childXML->getAttribute("id")); |
| |
| STRCMP(type, "CacheUnit") { |
| XMLCSTR name = childXML->getAttribute("name"); |
| if (strcmp(name, "Data Cache") == 0 || |
| strcmp(name, "dcache") == 0) { |
| dcache = new CacheUnit(childXML, &interface_ip); |
| children.push_back(dcache); |
| } |
| } |
| } |
| |
| /* |
| * LSU--in-order processors do not have separate load queue: unified lsq |
| * partitioned among threads |
| * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ |
| */ |
| tag = ldst_opcode + virtual_address_width + |
| int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; |
| line = int(ceil(data_path_width / BITS_PER_BYTE)); |
| size = core_params.store_buffer_size * line * core_params.num_hthreads; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = core_params.store_buffer_assoc; |
| interface_ip.nbanks = core_params.store_buffer_nbanks; |
| interface_ip.out_w = line * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.memory_ports; |
| interface_ip.num_wr_ports = core_params.memory_ports; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = core_params.memory_ports; |
| interface_ip.is_cache = true; |
| interface_ip.pure_ram = false; |
| interface_ip.pure_cam = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device, |
| clockRate, core_params.opt_local, core_params.core_ty); |
| area.set_area(area.get_area() + LSQ->local_result.area); |
| area.set_area(area.get_area()*cdb_overhead); |
| lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead); |
| |
| if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) { |
| tag = ldst_opcode + virtual_address_width + |
| int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; |
| line = int(ceil(data_path_width / BITS_PER_BYTE)); |
| size = core_params.load_buffer_size * line * core_params.num_hthreads; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = core_params.load_buffer_assoc; |
| interface_ip.nbanks = core_params.load_buffer_nbanks; |
| interface_ip.out_w = line * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.memory_ports; |
| interface_ip.num_wr_ports = core_params.memory_ports; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = core_params.memory_ports; |
| interface_ip.is_cache = true; |
| interface_ip.pure_ram = false; |
| interface_ip.pure_cam = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device, |
| clockRate, core_params.opt_local, |
| core_params.core_ty); |
| LoadQ->area.set_area(LoadQ->area.get_area() + |
| LoadQ->local_result.area); |
| area.set_area(area.get_area()*cdb_overhead); |
| lsq_height = (LSQ->local_result.cache_ht + |
| LoadQ->local_result.cache_ht) * sqrt(cdb_overhead); |
| } |
| |
| } |
| |
| MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_, |
| const CoreParameters & _core_params, |
| const CoreStatistics & _core_stats, bool exist_) |
| : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL), |
| interface_ip(*interface_ip_), |
| core_params(_core_params), core_stats(_core_stats), exist(exist_) { |
| if (!exist) return; |
| int tag; |
| int data; |
| int line; |
| |
| clockRate = core_params.clockRate; |
| name = "Memory Management Unit"; |
| |
| set_params_stats(); |
| |
| // These are shared between ITLB and DTLB |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| //Itlb TLBs are partioned among threads according to Nigara and Nehalem |
| tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + |
| int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; |
| data = physical_address_width - int(floor(log2(virtual_memory_page_size))); |
| line = int(ceil(data / BITS_PER_BYTE)); |
| |
| interface_ip.cache_sz = mem_man_params.itlb_number_entries * line; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = mem_man_params.itlb_assoc; |
| interface_ip.nbanks = mem_man_params.itlb_nbanks; |
| interface_ip.out_w = line * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Normal; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports; |
| interface_ip.num_rd_ports = 0; |
| interface_ip.num_wr_ports = 0; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = core_params.number_instruction_fetch_ports; |
| interface_ip.throughput = mem_man_params.itlb_throughput / clockRate; |
| interface_ip.latency = mem_man_params.itlb_latency / clockRate; |
| itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device, |
| clockRate, core_params.opt_local, core_params.core_ty); |
| area.set_area(area.get_area() + itlb->local_result.area); |
| |
| //dtlb |
| tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + |
| int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; |
| data = physical_address_width - int(floor(log2(virtual_memory_page_size))); |
| line = int(ceil(data / BITS_PER_BYTE)); |
| |
| interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = mem_man_params.dtlb_assoc; |
| interface_ip.nbanks = mem_man_params.dtlb_nbanks; |
| interface_ip.out_w = line * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Normal; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.memory_ports; |
| interface_ip.num_wr_ports = core_params.memory_ports; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = core_params.memory_ports; |
| interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate; |
| interface_ip.latency = mem_man_params.dtlb_latency / clockRate; |
| dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device, |
| clockRate, core_params.opt_local, core_params.core_ty); |
| area.set_area(area.get_area() + dtlb->local_result.area); |
| |
| } |
| |
| void |
| MemManU::set_params_stats() { |
| memset(&mem_man_params, 0, sizeof(MemoryManagementParams)); |
| memset(&mem_man_stats, 0, sizeof(MemoryManagementStats)); |
| int num_children = xml_data->nChildNode("component"); |
| int i; |
| for (i = 0; i < num_children; i++) { |
| XMLNode* child = xml_data->getChildNodePtr("component", &i); |
| XMLCSTR type = child->getAttribute("type"); |
| |
| if (!type) |
| warnMissingComponentType(child->getAttribute("id")); |
| |
| STRCMP(type, "InstructionTLB") { |
| int sub_num_children = child->nChildNode("param"); |
| int j; |
| for (j = 0; j < sub_num_children; j++) { |
| XMLNode* paramNode = child->getChildNodePtr("param", &j); |
| XMLCSTR node_name = paramNode->getAttribute("name"); |
| XMLCSTR value = paramNode->getAttribute("value"); |
| |
| if (!node_name) |
| warnMissingParamName(paramNode->getAttribute("id")); |
| |
| ASSIGN_INT_IF("number_entries", |
| mem_man_params.itlb_number_entries); |
| ASSIGN_FP_IF("latency", mem_man_params.itlb_latency); |
| ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput); |
| ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc); |
| ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks); |
| |
| else { |
| warnUnrecognizedParam(node_name); |
| } |
| } |
| sub_num_children = child->nChildNode("stat"); |
| for (j = 0; j < sub_num_children; j++) { |
| XMLNode* statNode = child->getChildNodePtr("stat", &j); |
| XMLCSTR node_name = statNode->getAttribute("name"); |
| XMLCSTR value = statNode->getAttribute("value"); |
| |
| if (!node_name) |
| warnMissingStatName(statNode->getAttribute("id")); |
| |
| ASSIGN_FP_IF("total_accesses", |
| mem_man_stats.itlb_total_accesses); |
| ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses); |
| ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts); |
| |
| else { |
| warnUnrecognizedStat(node_name); |
| } |
| } |
| } STRCMP(type, "DataTLB") { |
| int sub_num_children = child->nChildNode("param"); |
| int j; |
| for (j = 0; j < sub_num_children; j++) { |
| XMLNode* paramNode = child->getChildNodePtr("param", &j); |
| XMLCSTR node_name = paramNode->getAttribute("name"); |
| XMLCSTR value = paramNode->getAttribute("value"); |
| |
| if (!node_name) |
| warnMissingParamName(paramNode->getAttribute("id")); |
| |
| ASSIGN_INT_IF("number_entries", |
| mem_man_params.dtlb_number_entries); |
| ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency); |
| ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput); |
| ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc); |
| ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks); |
| |
| else { |
| warnUnrecognizedParam(node_name); |
| } |
| } |
| sub_num_children = child->nChildNode("stat"); |
| for (j = 0; j < sub_num_children; j++) { |
| XMLNode* statNode = child->getChildNodePtr("stat", &j); |
| XMLCSTR node_name = statNode->getAttribute("name"); |
| XMLCSTR value = statNode->getAttribute("value"); |
| |
| if (!node_name) |
| warnMissingStatName(statNode->getAttribute("id")); |
| |
| ASSIGN_FP_IF("read_accesses", |
| mem_man_stats.dtlb_read_accesses); |
| ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses); |
| ASSIGN_FP_IF("write_accesses", |
| mem_man_stats.dtlb_write_accesses); |
| ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses); |
| ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts); |
| |
| else { |
| warnUnrecognizedStat(node_name); |
| } |
| } |
| } |
| } |
| } |
| |
| RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_, |
| const CoreParameters & _core_params, |
| const CoreStatistics & _core_stats, bool exist_) |
| : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL), |
| interface_ip(*interface_ip_), |
| core_params(_core_params), core_stats(_core_stats), exist(exist_) { |
| /* |
| * processors have separate architectural register files for each thread. |
| * therefore, the bypass buses need to travel across all the register files. |
| */ |
| if (!exist) return; |
| int data; |
| int line; |
| |
| clockRate = core_params.clockRate; |
| name = "Register File Unit"; |
| |
| //**********************************IRF************************************ |
| data = core_params.int_data_width; |
| line = int(ceil(data / BITS_PER_BYTE)); |
| |
| interface_ip.cache_sz = core_params.num_IRF_entry * line; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = core_params.phy_Regs_IRF_assoc; |
| interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks; |
| interface_ip.out_w = line * BITS_PER_BYTE; |
| interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0; |
| interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports; |
| interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| IRF->output_data.area *= core_params.num_hthreads * |
| core_params.num_pipelines * cdb_overhead; |
| area.set_area(area.get_area() + IRF->local_result.area * |
| core_params.num_hthreads * core_params.num_pipelines * |
| cdb_overhead); |
| |
| //**********************************FRF************************************ |
| data = core_params.fp_data_width; |
| line = int(ceil(data / BITS_PER_BYTE)); |
| |
| interface_ip.cache_sz = core_params.num_FRF_entry * line; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = core_params.phy_Regs_FRF_assoc; |
| interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks; |
| interface_ip.out_w = line * BITS_PER_BYTE; |
| interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0; |
| interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = 0; |
| interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports; |
| interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device, |
| clockRate, core_params.opt_local, core_params.core_ty); |
| FRF->output_data.area *= core_params.num_hthreads * |
| core_params.num_fp_pipelines * cdb_overhead; |
| area.set_area(area.get_area() + FRF->local_result.area * |
| core_params.num_hthreads * core_params.num_fp_pipelines * |
| cdb_overhead); |
| int_regfile_height = IRF->local_result.cache_ht * |
| core_params.num_hthreads * sqrt(cdb_overhead); |
| fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads * |
| sqrt(cdb_overhead); |
| //since a EXU is associated with each pipeline, the cdb should not have |
| //longer length. |
| |
| if (core_params.regWindowing) { |
| //*********************************REG_WIN***************************** |
| //ECC, and usually 2 regs are transfered together during window |
| //shifting.Niagara Mega cell |
| data = core_params.int_data_width; |
| line = int(ceil(data / BITS_PER_BYTE)); |
| |
| interface_ip.cache_sz = core_params.register_window_size * |
| IRF->l_ip.cache_sz * core_params.num_hthreads; |
| interface_ip.line_sz = line; |
| interface_ip.assoc = core_params.register_window_assoc; |
| interface_ip.nbanks = core_params.register_window_nbanks; |
| interface_ip.out_w = line * BITS_PER_BYTE; |
| interface_ip.specific_tag = core_params.register_window_tag_width > 0; |
| interface_ip.tag_w = core_params.register_window_tag_width; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.register_window_rw_ports; |
| interface_ip.num_rd_ports = 0; |
| interface_ip.num_wr_ports = 0; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = |
| core_params.register_window_throughput / clockRate; |
| interface_ip.latency = |
| core_params.register_window_latency / clockRate; |
| RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device, |
| clockRate, core_params.opt_local, |
| core_params.core_ty); |
| RFWIN->output_data.area *= core_params.num_pipelines; |
| area.set_area(area.get_area() + RFWIN->local_result.area * |
| core_params.num_pipelines); |
| } |
| } |
| |
| EXECU::EXECU(XMLNode* _xml_data, |
| InputParameter* interface_ip_, double lsq_height_, |
| const CoreParameters & _core_params, |
| const CoreStatistics & _core_stats, bool exist_) |
| : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL), |
| exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL), |
| int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL), |
| fpTagBypass(NULL), interface_ip(*interface_ip_), |
| lsq_height(lsq_height_), core_params(_core_params), |
| core_stats(_core_stats), exist(exist_) { |
| if (!exist) return; |
| double fu_height = 0.0; |
| clockRate = core_params.clockRate; |
| name = "Execution Unit"; |
| rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats); |
| if (core_params.core_ty == OOO || |
| (core_params.core_ty == Inorder && core_params.multithreaded)) { |
| scheu = new SchedulerU(xml_data, &interface_ip, core_params, |
| core_stats); |
| area.set_area(area.get_area() + scheu->area.get_area() ); |
| } |
| exeu = new FunctionalUnit(xml_data, &interface_ip, core_params, |
| core_stats, ALU); |
| area.set_area(area.get_area() + exeu->area.get_area() + |
| rfu->area.get_area()); |
| fu_height = exeu->FU_height; |
| if (core_params.num_fpus > 0) { |
| fp_u = new FunctionalUnit(xml_data, &interface_ip, |
| core_params, core_stats, FPU); |
| area.set_area(area.get_area() + fp_u->area.get_area()); |
| } |
| if (core_params.num_muls > 0) { |
| mul = new FunctionalUnit(xml_data, &interface_ip, |
| core_params, core_stats, MUL); |
| area.set_area(area.get_area() + mul->area.get_area()); |
| fu_height += mul->FU_height; |
| } |
| /* |
| * broadcast logic, including int-broadcast; int_tag-broadcast; |
| * fp-broadcast; fp_tag-broadcast |
| * integer by pass has two paths and fp has 3 paths. |
| * on the same bus there are multiple tri-state drivers and muxes that go |
| * to different components on the same bus |
| */ |
| interface_ip.wt = core_params.execu_broadcast_wt; |
| interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type; |
| interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type; |
| interface_ip.throughput = core_params.broadcast_numerator / clockRate; |
| interface_ip.latency = core_params.broadcast_numerator / clockRate; |
| double scheu_Iw_height = 0.0; |
| double scheu_ROB_height = 0.0; |
| double scheu_fp_Iw_height = 0.0; |
| if (scheu) { |
| scheu_Iw_height = scheu->Iw_height; |
| scheu_ROB_height = scheu->ROB_height; |
| scheu_fp_Iw_height = scheu->fp_Iw_height; |
| } |
| |
| // Common bypass logic parameters |
| double base_w = core_params.execu_bypass_base_width; |
| double base_h = core_params.execu_bypass_base_height; |
| int level = core_params.execu_bypass_start_wiring_level; |
| double route_over_perc = core_params.execu_bypass_route_over_perc; |
| Wire_type wire_type = core_params.execu_bypass_wire_type; |
| int data_w; |
| double len; |
| |
| if (core_params.core_ty == Inorder) { |
| data_w = int(ceil(data_path_width / 32.0)*32); |
| len = rfu->int_regfile_height + exeu->FU_height + lsq_height; |
| int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device, |
| base_w, base_h, data_w, len, |
| &interface_ip, level, clockRate, false, |
| route_over_perc, core_params.opt_local, |
| core_params.core_ty, wire_type); |
| |
| data_w = core_params.perThreadState; |
| len = rfu->int_regfile_height + exeu->FU_height + lsq_height + |
| scheu_Iw_height; |
| intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", |
| Core_device, |
| base_w, base_h, data_w, len, |
| &interface_ip, level, clockRate, false, |
| route_over_perc, core_params.opt_local, |
| core_params.core_ty, wire_type); |
| |
| if (core_params.num_muls > 0) { |
| data_w = int(ceil(data_path_width / 32.0)*32*1.5); |
| len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + |
| lsq_height; |
| int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, |
| level, clockRate, false, |
| route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| |
| data_w = core_params.perThreadState; |
| len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + |
| lsq_height + scheu_Iw_height; |
| intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, |
| level, clockRate, false, |
| route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, |
| wire_type); |
| } |
| |
| if (core_params.num_fpus > 0) { |
| data_w = int(ceil(data_path_width / 32.0)*32*1.5); |
| len = rfu->fp_regfile_height + fp_u->FU_height; |
| fp_bypass = new Interconnect(xml_data, "FP Bypass Data", |
| Core_device, |
| base_w, base_h, data_w, len, |
| &interface_ip, level, clockRate, |
| false, route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| |
| data_w = core_params.perThreadState; |
| len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + |
| scheu_Iw_height; |
| fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", |
| Core_device, base_w, base_h, data_w, |
| len, &interface_ip, level, |
| clockRate, false, route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| } |
| } else {//OOO |
| if (core_params.scheu_ty == PhysicalRegFile) { |
| /* For physical register based OOO, |
| * data broadcast interconnects cover across functional units, lsq, |
| * inst windows and register files, |
| * while tag broadcast interconnects also cover across ROB |
| */ |
| data_w = int(ceil(core_params.int_data_width)); |
| len = rfu->int_regfile_height + exeu->FU_height + lsq_height; |
| int_bypass = new Interconnect(xml_data, "Int Bypass Data", |
| Core_device, base_w, base_h, data_w, |
| len, &interface_ip, level, clockRate, |
| false, route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| |
| data_w = core_params.phy_ireg_width; |
| len = rfu->int_regfile_height + exeu->FU_height + lsq_height + |
| scheu_Iw_height + scheu_ROB_height; |
| intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, level, |
| clockRate, false, route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| |
| if (core_params.num_muls > 0) { |
| data_w = int(ceil(core_params.int_data_width)); |
| len = rfu->int_regfile_height + exeu->FU_height + |
| mul->FU_height + lsq_height; |
| int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, |
| level, clockRate, false, |
| route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, |
| wire_type); |
| |
| data_w = core_params.phy_ireg_width; |
| len = rfu->int_regfile_height + exeu->FU_height + |
| mul->FU_height + lsq_height + scheu_Iw_height + |
| scheu_ROB_height; |
| intTag_mul_Bypass = new Interconnect(xml_data, |
| "Mul Bypass Tag", |
| Core_device, base_w, |
| base_h, data_w, len, |
| &interface_ip, level, |
| clockRate, false, |
| route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, |
| wire_type); |
| } |
| |
| if (core_params.num_fpus > 0) { |
| data_w = int(ceil(core_params.fp_data_width)); |
| len = rfu->fp_regfile_height + fp_u->FU_height; |
| fp_bypass = new Interconnect(xml_data, "FP Bypass Data", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, level, |
| clockRate, false, route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| |
| data_w = core_params.phy_freg_width; |
| len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + |
| scheu_fp_Iw_height + scheu_ROB_height; |
| fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, |
| level, clockRate, false, |
| route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| } |
| } else { |
| /* |
| * In RS based processor both data and tag are broadcast together, |
| * covering functional units, lsq, nst windows, register files, and ROBs |
| */ |
| data_w = int(ceil(core_params.int_data_width)); |
| len = rfu->int_regfile_height + exeu->FU_height + lsq_height + |
| scheu_Iw_height + scheu_ROB_height; |
| int_bypass = new Interconnect(xml_data, "Int Bypass Data", |
| Core_device, base_w, base_h, data_w, |
| len, &interface_ip, level, clockRate, |
| false, route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| |
| data_w = core_params.phy_ireg_width; |
| len = rfu->int_regfile_height + exeu->FU_height + lsq_height + |
| scheu_Iw_height + scheu_ROB_height; |
| intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, level, |
| clockRate, false, route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| if (core_params.num_muls > 0) { |
| data_w = int(ceil(core_params.int_data_width)); |
| len = rfu->int_regfile_height + exeu->FU_height + |
| mul->FU_height + lsq_height + scheu_Iw_height + |
| scheu_ROB_height; |
| int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, |
| level, clockRate, false, |
| route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, |
| wire_type); |
| |
| data_w = core_params.phy_ireg_width; |
| len = rfu->int_regfile_height + exeu->FU_height + |
| mul->FU_height + lsq_height + scheu_Iw_height + |
| scheu_ROB_height; |
| intTag_mul_Bypass = new Interconnect(xml_data, |
| "Mul Bypass Tag", |
| Core_device, base_w, |
| base_h, data_w, len, |
| &interface_ip, level, |
| clockRate, false, |
| route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, |
| wire_type); |
| } |
| |
| if (core_params.num_fpus > 0) { |
| data_w = int(ceil(core_params.fp_data_width)); |
| len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + |
| scheu_fp_Iw_height + scheu_ROB_height; |
| fp_bypass = new Interconnect(xml_data, "FP Bypass Data", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, level, |
| clockRate, false, route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| |
| data_w = core_params.phy_freg_width; |
| len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + |
| scheu_fp_Iw_height + scheu_ROB_height; |
| fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", |
| Core_device, base_w, base_h, |
| data_w, len, &interface_ip, |
| level, clockRate, false, |
| route_over_perc, |
| core_params.opt_local, |
| core_params.core_ty, wire_type); |
| } |
| } |
| } |
| if (int_bypass) { |
| children.push_back(int_bypass); |
| } |
| if (intTagBypass) { |
| children.push_back(intTagBypass); |
| } |
| if (int_mul_bypass) { |
| children.push_back(int_mul_bypass); |
| } |
| if (intTag_mul_Bypass) { |
| children.push_back(intTag_mul_Bypass); |
| } |
| if (fp_bypass) { |
| children.push_back(fp_bypass); |
| } |
| if (fpTagBypass) { |
| children.push_back(fpTagBypass); |
| } |
| |
| area.set_area(area.get_area() + int_bypass->area.get_area() + |
| intTagBypass->area.get_area()); |
| if (core_params.num_muls > 0) { |
| area.set_area(area.get_area() + int_mul_bypass->area.get_area() + |
| intTag_mul_Bypass->area.get_area()); |
| } |
| if (core_params.num_fpus > 0) { |
| area.set_area(area.get_area() + fp_bypass->area.get_area() + |
| fpTagBypass->area.get_area()); |
| } |
| } |
| |
| RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_, |
| const CoreParameters & _core_params, |
| const CoreStatistics & _core_stats, bool exist_) |
| : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL), |
| fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL), |
| RAHT(NULL), interface_ip(*interface_ip_), |
| core_params(_core_params), core_stats(_core_stats), exist(exist_) { |
| if (!exist) return; |
| int tag; |
| int data; |
| int out_w; |
| int size; |
| |
| // Assumption: |
| // We make an implicit design assumption based on the specific structure |
| // that is being modeled. |
| // 1. RAM-based RATs are direct mapped. However, if the associated |
| // scheduler is a reservation station style, the RATs are fully |
| // associative. |
| // 2. Non-CAM based RATs and free lists do not have tags. |
| // 3. Free lists are direct mapped. |
| |
| const int RAM_BASED_RAT_ASSOC = 1; |
| const int RS_RAT_ASSOC = 0; |
| const int NON_CAM_BASED_TAG_WIDTH = 0; |
| const int FREELIST_ASSOC = 1; |
| |
| clockRate = core_params.clockRate; |
| name = "Rename Unit"; |
| if (core_params.core_ty == OOO) { |
| //integer pipeline |
| if (core_params.scheu_ty == PhysicalRegFile) { |
| if (core_params.rm_ty == RAMbased) { |
| //FRAT with global checkpointing (GCs) please see paper tech |
| //report for detailed explaintions |
| |
| data = int(ceil(core_params.phy_ireg_width * |
| (1 + core_params.globalCheckpoint) / |
| BITS_PER_BYTE)); |
| out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); |
| |
| size = data * core_params.archi_Regs_IRF_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = RAM_BASED_RAT_ASSOC; |
| interface_ip.nbanks = core_params.front_rat_nbanks; |
| interface_ip.out_w = out_w * BITS_PER_BYTE; |
| interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; |
| interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.front_rat_rw_ports; |
| interface_ip.num_rd_ports = |
| NUM_SOURCE_OPERANDS * core_params.decodeW; |
| interface_ip.num_wr_ports = core_params.decodeW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", |
| Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| iFRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + iFRAT->area.get_area()); |
| |
| //FRAT floating point |
| data = int(ceil(core_params.phy_freg_width * |
| (1 + core_params.globalCheckpoint) / |
| BITS_PER_BYTE)); |
| out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); |
| size = data * core_params.archi_Regs_FRF_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = RAM_BASED_RAT_ASSOC; |
| interface_ip.nbanks = core_params.front_rat_nbanks; |
| interface_ip.out_w = out_w * BITS_PER_BYTE; |
| interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; |
| interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.front_rat_rw_ports; |
| interface_ip.num_rd_ports = |
| NUM_SOURCE_OPERANDS * core_params.fp_decodeW; |
| interface_ip.num_wr_ports = core_params.fp_decodeW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", |
| Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| fFRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + fFRAT->area.get_area()); |
| |
| } else if ((core_params.rm_ty == CAMbased)) { |
| //IRAT |
| tag = core_params.arch_ireg_width; |
| //the address of CAM needed to be sent out |
| data = int(ceil((core_params.arch_ireg_width + 1 * |
| core_params.globalCheckpoint) / |
| BITS_PER_BYTE)); |
| out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE)); |
| size = data * core_params.phy_Regs_IRF_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = CAM_ASSOC; |
| interface_ip.nbanks = core_params.front_rat_nbanks; |
| interface_ip.out_w = out_w * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.front_rat_rw_ports; |
| interface_ip.num_rd_ports = core_params.decodeW; |
| interface_ip.num_wr_ports = core_params.decodeW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = |
| NUM_SOURCE_OPERANDS * core_params.decodeW; |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", |
| Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| iFRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + iFRAT->area.get_area()); |
| |
| //FRAT for FP |
| tag = core_params.arch_freg_width; |
| //the address of CAM needed to be sent out |
| data = int(ceil((core_params.arch_freg_width + 1 * |
| core_params.globalCheckpoint) / |
| BITS_PER_BYTE)); |
| out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); |
| size = data * core_params.phy_Regs_FRF_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = CAM_ASSOC; |
| interface_ip.nbanks = core_params.front_rat_nbanks; |
| interface_ip.out_w = out_w * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.front_rat_rw_ports; |
| interface_ip.num_rd_ports = core_params.fp_decodeW; |
| interface_ip.num_wr_ports = core_params.fp_decodeW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = |
| NUM_SOURCE_OPERANDS * core_params.fp_decodeW; |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", |
| Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| fFRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + fFRAT->area.get_area()); |
| } |
| |
| //RRAT is always RAM based, does not have GCs, and is used only for |
| //record latest non-speculative mapping |
| data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); |
| size = data * core_params.archi_Regs_IRF_size * |
| NUM_SOURCE_OPERANDS; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = RAM_BASED_RAT_ASSOC; |
| interface_ip.nbanks = core_params.retire_rat_nbanks; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; |
| interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; |
| interface_ip.num_rd_ports = core_params.commitW; |
| interface_ip.num_wr_ports = core_params.commitW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| iRRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + iRRAT->area.get_area()); |
| |
| //RRAT for FP |
| data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); |
| size = data * core_params.archi_Regs_FRF_size * |
| NUM_SOURCE_OPERANDS; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = RAM_BASED_RAT_ASSOC; |
| interface_ip.nbanks = core_params.retire_rat_nbanks; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; |
| interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; |
| interface_ip.num_rd_ports = core_params.fp_decodeW; |
| interface_ip.num_wr_ports = core_params.fp_decodeW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| fRRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + fRRAT->area.get_area()); |
| |
| //Freelist of renaming unit always RAM based |
| //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist |
| // 2)When instruction commits the Phyregisters/ROB needed to be recycled. |
| //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width |
| data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); |
| size = data * core_params.num_ifreelist_entries; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = FREELIST_ASSOC; |
| interface_ip.nbanks = core_params.freelist_nbanks; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; |
| interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.freelist_rw_ports; |
| interface_ip.num_rd_ports = core_params.decodeW; |
| interface_ip.num_wr_ports = |
| core_params.decodeW - 1 + core_params.commitW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| ifreeL->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + ifreeL->area.get_area()); |
| |
| //freelist for FP |
| data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); |
| size = data * core_params.num_ffreelist_entries; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = FREELIST_ASSOC; |
| interface_ip.nbanks = core_params.freelist_nbanks; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; |
| interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; |
| interface_ip.access_mode = Sequential; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.freelist_rw_ports; |
| interface_ip.num_rd_ports = core_params.fp_decodeW; |
| interface_ip.num_wr_ports = |
| core_params.fp_decodeW - 1 + core_params.commitW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| ffreeL->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + ffreeL->area.get_area()); |
| |
| } else if (core_params.scheu_ty == ReservationStation) { |
| if (core_params.rm_ty == RAMbased) { |
| tag = core_params.phy_ireg_width; |
| data = int(ceil(core_params.phy_ireg_width * |
| (1 + core_params.globalCheckpoint) / |
| BITS_PER_BYTE)); |
| out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); |
| size = data * core_params.archi_Regs_IRF_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = RS_RAT_ASSOC; |
| interface_ip.nbanks = core_params.front_rat_nbanks; |
| interface_ip.out_w = out_w * BITS_PER_BYTE; |
| interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; |
| interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.front_rat_rw_ports; |
| interface_ip.num_rd_ports = |
| NUM_SOURCE_OPERANDS * core_params.decodeW; |
| interface_ip.num_wr_ports = core_params.decodeW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = core_params.commitW; |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", |
| Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| iFRAT->local_result.adjust_area(); |
| iFRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + iFRAT->area.get_area()); |
| |
| //FP |
| tag = core_params.phy_freg_width; |
| data = int(ceil(core_params.phy_freg_width * |
| (1 + core_params.globalCheckpoint) / |
| BITS_PER_BYTE)); |
| out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); |
| size = data * core_params.archi_Regs_FRF_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = RS_RAT_ASSOC; |
| interface_ip.nbanks = core_params.front_rat_nbanks; |
| interface_ip.out_w = out_w * BITS_PER_BYTE; |
| interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; |
| interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.front_rat_rw_ports; |
| interface_ip.num_rd_ports = |
| NUM_SOURCE_OPERANDS * core_params.fp_decodeW; |
| interface_ip.num_wr_ports = core_params.fp_decodeW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = core_params.fp_issueW; |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", |
| Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| fFRAT->local_result.adjust_area(); |
| fFRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + fFRAT->area.get_area()); |
| |
| } else if ((core_params.rm_ty == CAMbased)) { |
| //FRAT |
| //the address of CAM needed to be sent out |
| tag = core_params.arch_ireg_width; |
| data = int(ceil (core_params.arch_ireg_width + |
| 1 * core_params.globalCheckpoint / |
| BITS_PER_BYTE)); |
| out_w = int(ceil (core_params.arch_ireg_width / |
| BITS_PER_BYTE)); |
| size = data * core_params.phy_Regs_IRF_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = CAM_ASSOC; |
| interface_ip.nbanks = core_params.front_rat_nbanks; |
| interface_ip.out_w = out_w * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.front_rat_rw_ports; |
| interface_ip.num_rd_ports = core_params.decodeW; |
| interface_ip.num_wr_ports = core_params.decodeW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = |
| NUM_SOURCE_OPERANDS * core_params.decodeW; |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", |
| Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| iFRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + iFRAT->area.get_area()); |
| |
| //FRAT |
| tag = core_params.arch_freg_width; |
| //the address of CAM needed to be sent out |
| data = int(ceil(core_params.arch_freg_width + |
| 1 * core_params.globalCheckpoint / |
| BITS_PER_BYTE)); |
| out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); |
| size = data * core_params.phy_Regs_FRF_size; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = CAM_ASSOC; |
| interface_ip.nbanks = core_params.front_rat_nbanks; |
| interface_ip.out_w = out_w * BITS_PER_BYTE; |
| interface_ip.specific_tag = tag > 0; |
| interface_ip.tag_w = tag; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.front_rat_rw_ports; |
| interface_ip.num_rd_ports = core_params.decodeW; |
| interface_ip.num_wr_ports = core_params.fp_decodeW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = |
| NUM_SOURCE_OPERANDS * core_params.fp_decodeW; |
| interface_ip.is_cache = true; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = false; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", |
| Core_device, clockRate, |
| core_params.opt_local, |
| core_params.core_ty); |
| fFRAT->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + fFRAT->area.get_area()); |
| |
| } |
| //No RRAT for RS based OOO |
| //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified |
| data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); |
| size = data * core_params.num_ifreelist_entries; |
| |
| interface_ip.cache_sz = size; |
| interface_ip.line_sz = data; |
| interface_ip.assoc = FREELIST_ASSOC; |
| interface_ip.nbanks = core_params.freelist_nbanks; |
| interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; |
| interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; |
| interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; |
| interface_ip.access_mode = Fast; |
| interface_ip.obj_func_dyn_energy = 0; |
| interface_ip.obj_func_dyn_power = 0; |
| interface_ip.obj_func_leak_power = 0; |
| interface_ip.obj_func_cycle_t = 1; |
| interface_ip.num_rw_ports = core_params.freelist_rw_ports; |
| interface_ip.num_rd_ports = core_params.decodeW; |
| interface_ip.num_wr_ports = |
| core_params.decodeW - 1 + core_params.commitW; |
| interface_ip.num_se_rd_ports = 0; |
| interface_ip.num_search_ports = 0; |
| interface_ip.is_cache = false; |
| interface_ip.pure_cam = false; |
| interface_ip.pure_ram = true; |
| interface_ip.throughput = 1.0 / clockRate; |
| interface_ip.latency = 1.0 / clockRate; |
| ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List", |
| Core_device, clockRate, core_params.opt_local, |
| core_params.core_ty); |
| ifreeL->output_data.area *= core_params.num_hthreads; |
| area.set_area(area.get_area() + ifreeL->area.get_area()); |
| } |
| |
| } |
| idcl = |
| new dep_resource_conflict_check(xml_data, |
| "Instruction Dependency Check?", |
| &interface_ip, core_params, |
| core_params.phy_ireg_width, |
| clockRate); |
| fdcl = |
| new dep_resource_conflict_check(xml_data, |
| "FP Dependency Check?", &interface_ip, |
| core_params, |
| core_params.phy_freg_width, clockRate); |
| } |
| |
| Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_) |
| : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL), |
| exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL), |
| ithCore(_ithCore), interface_ip(*interface_ip_) { |
| |
| ostringstream os; |
| os << ithCore; |
| name = "Core " + os.str(); |
| |
| int i = 0; |
| XMLNode* childXML; |
| for (i = 0; i < xml_data->nChildNode("component"); i++) { |
| childXML = xml_data->getChildNodePtr("component", &i); |
| XMLCSTR type = childXML->getAttribute("type"); |
| if (!type) |
| warnMissingComponentType(childXML->getAttribute("id")); |
| |
| STRCMP(type, "CacheUnit") { |
| XMLCSTR comp_name = childXML->getAttribute("id"); |
| if (!comp_name) |
| continue; |
| |
| STRCMP(comp_name, "system.L20") { |
| l2cache = new CacheUnit(childXML, &interface_ip); |
| children.push_back(l2cache); |
| } |
| } |
| } |
| |
| set_core_param(); |
| clockRate = core_params.clockRate; |
| |
| ifu = new InstFetchU(xml_data, &interface_ip, core_params, |
| core_stats); |
| children.push_back(ifu); |
| lsu = new LoadStoreU(xml_data, &interface_ip, core_params, |
| core_stats); |
| children.push_back(lsu); |
| mmu = new MemManU(xml_data, &interface_ip, core_params, |
| core_stats); |
| children.push_back(mmu); |
| exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height, |
| core_params, core_stats); |
| children.push_back(exu); |
| undiffCore = new UndiffCore(xml_data, &interface_ip, core_params); |
| children.push_back(undiffCore); |
| if (core_params.core_ty == OOO) { |
| rnu = new RENAMINGU(xml_data, &interface_ip, core_params, |
| core_stats); |
| children.push_back(rnu); |
| } |
| corepipe = new Pipeline(xml_data, &interface_ip, core_params); |
| children.push_back(corepipe); |
| |
| double pipeline_area_per_unit; |
| if (core_params.core_ty == OOO) { |
| pipeline_area_per_unit = (corepipe->area.get_area() * |
| core_params.num_pipelines) / 5.0; |
| if (rnu->exist) { |
| rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); |
| } |
| } else { |
| pipeline_area_per_unit = (corepipe->area.get_area() * |
| core_params.num_pipelines) / 4.0; |
| } |
| |
| // Move all of this to computeArea |
| //area.set_area(area.get_area()+ corepipe->area.get_area()); |
| if (ifu->exist) { |
| ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); |
| area.set_area(area.get_area() + ifu->area.get_area()); |
| } |
| if (lsu->exist) { |
| lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); |
| area.set_area(area.get_area() + lsu->area.get_area()); |
| } |
| if (exu->exist) { |
| exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); |
| area.set_area(area.get_area() + exu->area.get_area()); |
| } |
| if (mmu->exist) { |
| mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); |
| area.set_area(area.get_area() + mmu->area.get_area()); |
| } |
| |
| if (core_params.core_ty == OOO) { |
| if (rnu->exist) { |
| |
| area.set_area(area.get_area() + rnu->area.get_area()); |
| } |
| } |
| |
| if (undiffCore->exist) { |
| area.set_area(area.get_area() + undiffCore->area.get_area()); |
| } |
| |
| if (l2cache) { |
| area.set_area(area.get_area() + l2cache->area.get_area()); |
| } |
| } |
| |
| |
| void BranchPredictor::computeEnergy() { |
| if (!exist) return; |
| |
| // ASSUMPTION: All instructions access the branch predictors at Fetch and |
| // only branch instrucions update the predictors regardless |
| // of the correctness of the prediction. |
| double tdp_read_accesses = |
| core_params.predictionW * core_stats.BR_duty_cycle; |
| globalBPT->tdp_stats.reset(); |
| globalBPT->tdp_stats.readAc.access = tdp_read_accesses; |
| globalBPT->tdp_stats.writeAc.access = 0; |
| globalBPT->rtp_stats.reset(); |
| globalBPT->rtp_stats.readAc.access = core_stats.total_instructions; |
| globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; |
| globalBPT->power_t.reset(); |
| globalBPT->power_t.readOp.dynamic += |
| globalBPT->local_result.power.readOp.dynamic * |
| globalBPT->tdp_stats.readAc.access + |
| globalBPT->local_result.power.writeOp.dynamic * |
| globalBPT->tdp_stats.writeAc.access; |
| globalBPT->power_t = globalBPT->power_t + |
| globalBPT->local_result.power * pppm_lkg; |
| globalBPT->rt_power.reset(); |
| globalBPT->rt_power.readOp.dynamic += |
| globalBPT->local_result.power.readOp.dynamic * |
| globalBPT->rtp_stats.readAc.access + |
| globalBPT->local_result.power.writeOp.dynamic * |
| globalBPT->rtp_stats.writeAc.access; |
| |
| L1_localBPT->tdp_stats.reset(); |
| L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses; |
| L1_localBPT->tdp_stats.writeAc.access = 0; |
| L1_localBPT->rtp_stats.reset(); |
| L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions; |
| L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; |
| L1_localBPT->power_t.reset(); |
| L1_localBPT->power_t.readOp.dynamic += |
| L1_localBPT->local_result.power.readOp.dynamic * |
| L1_localBPT->tdp_stats.readAc.access + |
| L1_localBPT->local_result.power.writeOp.dynamic * |
| L1_localBPT->tdp_stats.writeAc.access; |
| L1_localBPT->power_t = L1_localBPT->power_t + |
| L1_localBPT->local_result.power * pppm_lkg; |
| L1_localBPT->rt_power.reset(); |
| L1_localBPT->rt_power.readOp.dynamic += |
| L1_localBPT->local_result.power.readOp.dynamic * |
| L1_localBPT->rtp_stats.readAc.access + |
| L1_localBPT->local_result.power.writeOp.dynamic * |
| L1_localBPT->rtp_stats.writeAc.access; |
| |
| L2_localBPT->tdp_stats.reset(); |
| L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses; |
| L2_localBPT->tdp_stats.writeAc.access = 0; |
| L2_localBPT->rtp_stats.reset(); |
| L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions; |
| L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; |
| L2_localBPT->power_t.reset(); |
| L2_localBPT->power_t.readOp.dynamic += |
| L2_localBPT->local_result.power.readOp.dynamic * |
| L2_localBPT->tdp_stats.readAc.access + |
| L2_localBPT->local_result.power.writeOp.dynamic * |
| L2_localBPT->tdp_stats.writeAc.access; |
| L2_localBPT->power_t = L2_localBPT->power_t + |
| L2_localBPT->local_result.power * pppm_lkg; |
| L2_localBPT->rt_power.reset(); |
| L2_localBPT->rt_power.readOp.dynamic += |
| L2_localBPT->local_result.power.readOp.dynamic * |
| L2_localBPT->rtp_stats.readAc.access + |
| L2_localBPT->local_result.power.writeOp.dynamic * |
| L2_localBPT->rtp_stats.writeAc.access; |
| |
| chooser->tdp_stats.reset(); |
| chooser->tdp_stats.readAc.access = tdp_read_accesses; |
| chooser->tdp_stats.writeAc.access = 0; |
| chooser->rtp_stats.reset(); |
| chooser->rtp_stats.readAc.access = core_stats.total_instructions; |
| chooser->rtp_stats.writeAc.access = core_stats.branch_instructions; |
| chooser->power_t.reset(); |
| chooser->power_t.readOp.dynamic += |
| chooser->local_result.power.readOp.dynamic * |
| chooser->tdp_stats.readAc.access + |
| chooser->local_result.power.writeOp.dynamic * |
| chooser->tdp_stats.writeAc.access; |
| chooser->power_t = |
| chooser->power_t + chooser->local_result.power * pppm_lkg; |
| chooser->rt_power.reset(); |
| chooser->rt_power.readOp.dynamic += |
| chooser->local_result.power.readOp.dynamic * |
| chooser->rtp_stats.readAc.access + |
| chooser->local_result.power.writeOp.dynamic * |
| chooser->rtp_stats.writeAc.access; |
| |
| RAS->tdp_stats.reset(); |
| RAS->tdp_stats.readAc.access = tdp_read_accesses; |
| RAS->tdp_stats.writeAc.access = 0; |
| RAS->rtp_stats.reset(); |
| RAS->rtp_stats.readAc.access = core_stats.function_calls; |
| RAS->rtp_stats.writeAc.access = core_stats.function_calls; |
| RAS->power_t.reset(); |
| RAS->power_t.readOp.dynamic += |
| RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access + |
| RAS->local_result.power.writeOp.dynamic * |
| RAS->tdp_stats.writeAc.access; |
| RAS->power_t = RAS->power_t + RAS->local_result.power * |
| core_params.pppm_lkg_multhread; |
| RAS->rt_power.reset(); |
| RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic * |
| RAS->rtp_stats.readAc.access + |
| RAS->local_result.power.writeOp.dynamic * |
| RAS->rtp_stats.writeAc.access; |
| |
| output_data.reset(); |
| if (globalBPT) { |
| globalBPT->output_data.peak_dynamic_power = |
| globalBPT->power_t.readOp.dynamic * clockRate; |
| globalBPT->output_data.runtime_dynamic_energy = |
| globalBPT->rt_power.readOp.dynamic; |
| output_data += globalBPT->output_data; |
| } |
| if (L1_localBPT) { |
| L1_localBPT->output_data.peak_dynamic_power = |
| L1_localBPT->power_t.readOp.dynamic * clockRate; |
| L1_localBPT->output_data.runtime_dynamic_energy = |
| L1_localBPT->rt_power.readOp.dynamic; |
| output_data += L1_localBPT->output_data; |
| } |
| if (L2_localBPT) { |
| L2_localBPT->output_data.peak_dynamic_power = |
| L2_localBPT->power_t.readOp.dynamic * clockRate; |
| L2_localBPT->output_data.runtime_dynamic_energy = |
| L2_localBPT->rt_power.readOp.dynamic; |
| output_data += L2_localBPT->output_data; |
| } |
| if (chooser) { |
| chooser->output_data.peak_dynamic_power = |
| chooser->power_t.readOp.dynamic * clockRate; |
| chooser->output_data.runtime_dynamic_energy = |
| chooser->rt_power.readOp.dynamic; |
| output_data += chooser->output_data; |
| } |
| if (RAS) { |
| RAS->output_data.peak_dynamic_power = |
| RAS->power_t.readOp.dynamic * clockRate; |
| RAS->output_data.subthreshold_leakage_power = |
| RAS->power_t.readOp.leakage * core_params.num_hthreads; |
| RAS->output_data.gate_leakage_power = |
| RAS->power_t.readOp.gate_leakage * core_params.num_hthreads; |
| RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic; |
| output_data += RAS->output_data; |
| } |
| } |
| |
| void BranchPredictor::displayData(uint32_t indent, int plevel) { |
| if (!exist) return; |
| |
| McPATComponent::displayData(indent, plevel); |
| |
| globalBPT->displayData(indent + 4, plevel); |
| L1_localBPT->displayData(indent + 4, plevel); |
| L2_localBPT->displayData(indent + 4, plevel); |
| chooser->displayData(indent + 4, plevel); |
| RAS->displayData(indent + 4, plevel); |
| } |
| |
| void InstFetchU::computeEnergy() { |
| if (!exist) return; |
| |
| if (BPT) { |
| BPT->computeEnergy(); |
| } |
| |
| IB->tdp_stats.reset(); |
| IB->tdp_stats.readAc.access = core_params.peak_issueW; |
| IB->tdp_stats.writeAc.access = core_params.peak_issueW; |
| IB->rtp_stats.reset(); |
| IB->rtp_stats.readAc.access = core_stats.total_instructions; |
| IB->rtp_stats.writeAc.access = core_stats.total_instructions; |
| IB->power_t.reset(); |
| IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic * |
| IB->tdp_stats.readAc.access + |
| IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access; |
| IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg; |
| IB->rt_power.reset(); |
| IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic * |
| IB->rtp_stats.readAc.access + |
| IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access; |
| |
| if (core_params.predictionW > 0) { |
| BTB->tdp_stats.reset(); |
| BTB->tdp_stats.readAc.access = core_params.predictionW; |
| BTB->tdp_stats.writeAc.access = 0; |
| BTB->rtp_stats.reset(); |
| BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses; |
| BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses; |
| BTB->power_t.reset(); |
| BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic * |
| BTB->tdp_stats.readAc.access + |
| BTB->local_result.power.writeOp.dynamic * |
| BTB->tdp_stats.writeAc.access; |
| BTB->rt_power.reset(); |
| BTB->rt_power.readOp.dynamic += |
| BTB->local_result.power.readOp.dynamic * |
| BTB->rtp_stats.readAc.access + |
| BTB->local_result.power.writeOp.dynamic * |
| BTB->rtp_stats.writeAc.access; |
| } |
| |
| ID_inst->tdp_stats.reset(); |
| ID_inst->tdp_stats.readAc.access = core_params.decodeW; |
| ID_inst->power_t.reset(); |
| ID_inst->power_t = ID_misc->power; |
| ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic * |
| ID_inst->tdp_stats.readAc.access; |
| ID_inst->rtp_stats.reset(); |
| ID_inst->rtp_stats.readAc.access = core_stats.total_instructions; |
| ID_inst->rt_power.reset(); |
| ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic * |
| ID_inst->rtp_stats.readAc.access; |
| |
| ID_operand->tdp_stats.reset(); |
| ID_operand->tdp_stats.readAc.access = core_params.decodeW; |
| ID_operand->power_t.reset(); |
| ID_operand->power_t = ID_misc->power; |
| ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic * |
| ID_operand->tdp_stats.readAc.access; |
| ID_operand->rtp_stats.reset(); |
| ID_operand->rtp_stats.readAc.access = core_stats.total_instructions; |
| ID_operand->rt_power.reset(); |
| ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic * |
| ID_operand->rtp_stats.readAc.access; |
| |
| ID_misc->tdp_stats.reset(); |
| ID_misc->tdp_stats.readAc.access = core_params.decodeW; |
| ID_misc->power_t.reset(); |
| ID_misc->power_t = ID_misc->power; |
| ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic * |
| ID_misc->tdp_stats.readAc.access; |
| ID_misc->rtp_stats.reset(); |
| ID_misc->rtp_stats.readAc.access = core_stats.total_instructions; |
| ID_misc->rt_power.reset(); |
| ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic * |
| ID_misc->rtp_stats.readAc.access; |
| |
| power.reset(); |
| rt_power.reset(); |
| McPATComponent::computeEnergy(); |
| |
| output_data.reset(); |
| if (icache) { |
| output_data += icache->output_data; |
| } |
| if (IB) { |
| IB->output_data.peak_dynamic_power = |
| IB->power_t.readOp.dynamic * clockRate; |
| IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic; |
| output_data += IB->output_data; |
| } |
| if (BTB) { |
| BTB->output_data.peak_dynamic_power = |
| BTB->power_t.readOp.dynamic * clockRate; |
| BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic; |
| output_data += BTB->output_data; |
| } |
| if (BPT) { |
| output_data += BPT->output_data; |
| } |
| if (ID_inst) { |
| ID_inst->output_data.peak_dynamic_power = |
| ID_inst->power_t.readOp.dynamic * clockRate; |
| ID_inst->output_data.runtime_dynamic_energy = |
| ID_inst->rt_power.readOp.dynamic; |
| output_data += ID_inst->output_data; |
| } |
| if (ID_operand) { |
| ID_operand->output_data.peak_dynamic_power = |
| ID_operand->power_t.readOp.dynamic * clockRate; |
| ID_operand->output_data.runtime_dynamic_energy = |
| ID_operand->rt_power.readOp.dynamic; |
| output_data += ID_operand->output_data; |
| } |
| if (ID_misc) { |
| ID_misc->output_data.peak_dynamic_power = |
| ID_misc->power_t.readOp.dynamic * clockRate; |
| ID_misc->output_data.runtime_dynamic_energy = |
| ID_misc->rt_power.readOp.dynamic; |
| output_data += ID_misc->output_data; |
| } |
| } |
| |
| void InstFetchU::displayData(uint32_t indent, int plevel) { |
| if (!exist) return; |
| |
| McPATComponent::displayData(indent, plevel); |
| |
| if (core_params.predictionW > 0) { |
| BTB->displayData(indent + 4, plevel); |
| if (BPT->exist) { |
| BPT->displayData(indent + 4, plevel); |
| } |
| } |
| IB->displayData(indent + 4, plevel); |
| ID_inst->displayData(indent + 4, plevel); |
| ID_operand->displayData(indent + 4, plevel); |
| ID_misc->displayData(indent + 4, plevel); |
| } |
| |
| void RENAMINGU::computeEnergy() { |
| if (!exist) return; |
| |
| idcl->tdp_stats.reset(); |
| idcl->rtp_stats.reset(); |
| idcl->power_t.reset(); |
| idcl->rt_power.reset(); |
| if (core_params.core_ty == OOO) { |
| idcl->tdp_stats.readAc.access = core_params.decodeW; |
| idcl->rtp_stats.readAc.access = 3 * core_params.decodeW * |
| core_params.decodeW * core_stats.rename_reads; |
| } else if (core_params.issueW > 1) { |
| idcl->tdp_stats.readAc.access = core_params.decodeW; |
| idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions; |
| } |
| idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access * |
| idcl->power.readOp.dynamic; |
| idcl->power_t.readOp.leakage = idcl->power.readOp.leakage * |
| core_params.num_hthreads; |
| idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage * |
| core_params.num_hthreads; |
| idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access * |
| idcl->power.readOp.dynamic; |
| |
| fdcl->tdp_stats.reset(); |
| fdcl->rtp_stats.reset(); |
| fdcl->power_t.reset(); |
| fdcl->rt_power.reset(); |
| if (core_params.core_ty == OOO) { |
| fdcl->tdp_stats.readAc.access = core_params.decodeW; |
| fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW * |
| core_params.fp_issueW * core_stats.fp_rename_writes; |
| } else if (core_params.issueW > 1) { |
| fdcl->tdp_stats.readAc.access = core_params.decodeW; |
| fdcl->rtp_stats.readAc.access = core_stats.fp_instructions; |
| } |
| fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access * |
| fdcl->power.readOp.dynamic; |
| fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage * |
| core_params.num_hthreads; |
| fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage * |
| core_params.num_hthreads; |
| fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access * |
| fdcl->power.readOp.dynamic; |
| |
| if (iRRAT) { |
| iRRAT->tdp_stats.reset(); |
| iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports; |
| iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports; |
| iRRAT->rtp_stats.reset(); |
| iRRAT->rtp_stats.readAc.access = core_stats.rename_writes; |
| iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes; |
| iRRAT->power_t.reset(); |
| iRRAT->power_t.readOp.dynamic += |
| iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic + |
| iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; |
| iRRAT->rt_power.reset(); |
| iRRAT->rt_power.readOp.dynamic += |
| iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic + |
| iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; |
| iRRAT->power_t.readOp.leakage = |
| iRRAT->power.readOp.leakage * core_params.num_hthreads; |
| iRRAT->power_t.readOp.gate_leakage = |
| iRRAT->power.readOp.gate_leakage * core_params.num_hthreads; |
| } |
| |
| if (ifreeL) { |
| ifreeL->tdp_stats.reset(); |
| ifreeL->tdp_stats.readAc.access = core_params.decodeW; |
| ifreeL->tdp_stats.writeAc.access = core_params.decodeW; |
| ifreeL->rtp_stats.reset(); |
| if (core_params.scheu_ty == PhysicalRegFile) { |
| ifreeL->rtp_stats.readAc.access = core_stats.rename_reads; |
| ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes; |
| } else if (core_params.scheu_ty == ReservationStation) { |
| ifreeL->rtp_stats.readAc.access = |
| core_stats.rename_reads + core_stats.fp_rename_reads; |
| ifreeL->rtp_stats.writeAc.access = |
| 2 * (core_stats.rename_writes + core_stats.fp_rename_writes); |
| } |
| ifreeL->power_t.reset(); |
| ifreeL->power_t.readOp.dynamic += |
| ifreeL->tdp_stats.readAc.access * |