| <?xml version="1.0" ?> |
| <component id="root" name="root"> |
| <component id="system" name="system"> |
| <!--McPAT will skip the components if number is set to 0 --> |
| <!--Duty cycles in this file are set according to "ARM MPcore |
| ARchitecture performance Enhancement" in MPF Japan 2008 --> |
| <param name="number_of_cores" value="2"/> |
| <param name="number_of_L1Directories" value="2"/> |
| <param name="number_of_L2Directories" value="0"/> |
| <param name="number_of_L2s" value="0"/> <!-- This number means how many L2 clusters in each cluster there can be multiple banks/ports --> |
| <param name="Private_L2" value="0"/><!--1 Private, 0 shared/coherent --> |
| <param name="number_of_L3s" value="0"/> <!-- This number means how many L3 clusters --> |
| <param name="number_of_NoCs" value="1"/> |
| <param name="homogeneous_cores" value="1"/><!--1 means homo --> |
| <param name="homogeneous_L2s" value="1"/> |
| <param name="homogeneous_L1Directorys" value="1"/> |
| <param name="homogeneous_L2Directorys" value="1"/> |
| <param name="homogeneous_L3s" value="1"/> |
| <param name="homogeneous_ccs" value="1"/><!--cache coherece hardware --> |
| <param name="homogeneous_NoCs" value="1"/> |
| <param name="core_tech_node" value="22"/><!-- nm --> |
| <param name="target_core_clockrate" value="2000"/><!--MHz --> |
| <param name="temperature" value="340"/> <!-- Kelvin --> |
| <param name="number_cache_levels" value="2"/> |
| <param name="interconnect_projection_type" value="1"/><!--0: agressive wire technology; 1: conservative wire technology --> |
| <param name="device_type" value="2"/><!--0: HP(High Performance Type); 1: LSTP(Low standby power) 2: LOP (Low Operating Power) --> |
| <param name="longer_channel_device" value="1"/><!-- 0 no use; 1 use when approperiate --> |
| <param name="Embedded" value="1"/><!-- Embedded processor like ARM or general purpose processors? --> |
| <param name="opt_clockrate" value="1"/> |
| <param name="machine_bits" value="32"/> |
| <param name="virtual_address_width" value="32"/> |
| <param name="physical_address_width" value="32"/> |
| <param name="virtual_memory_page_size" value="4096"/> |
| <!-- address width determins the tag_width in Cache, LSQ and buffers in cache controller |
| default value is machine_bits, if not set --> |
| <stat name="total_cycles" value="100000"/> |
| <stat name="idle_cycles" value="0"/> |
| <stat name="busy_cycles" value="100000"/> |
| <!--This page size(B) is complete different from the page size in Main memo secction. this page size is the size of |
| virtual memory from OS/Archi perspective; the page size in Main memo secction is the actuall physical line in a DRAM bank --> |
| <!-- *********************** cores ******************* --> |
| <component id="system.core0" name="core0"> |
| <!-- Core property --> |
| <param name="clock_rate" value="2000"/> |
| <!-- for cores with unknow timing, set to 0 to force off the opt flag --> |
| <param name="opt_local" value="1"/> |
| <param name="instruction_length" value="32"/> |
| <param name="opcode_width" value="7"/> |
| <param name="x86" value="0"/> |
| <param name="micro_opcode_width" value="8"/> |
| <param name="machine_type" value="0"/> |
| <!-- inorder/OoO; 1 inorder; 0 OOO--> |
| <param name="number_hardware_threads" value="1"/> |
| <!-- number_instruction_fetch_ports(icache ports) is always 1 in single-thread processor, |
| it only may be more than one in SMT processors. BTB ports always equals to fetch ports since |
| branch information in consective branch instructions in the same fetch group can be read out from BTB once.--> |
| <param name="fetch_width" value="2"/> |
| <!-- fetch_width determins the size of cachelines of L1 cache block --> |
| <param name="number_instruction_fetch_ports" value="1"/> |
| <param name="decode_width" value="2"/> |
| <!-- decode_width determins the number of ports of the |
| renaming table (both RAM and CAM) scheme --> |
| <param name="issue_width" value="4"/> |
| <param name="peak_issue_width" value="7"/> |
| <!-- issue_width determins the number of ports of Issue window and other logic |
| as in the complexity effective proccessors paper; issue_width==dispatch_width --> |
| <param name="commit_width" value="4"/> |
| <!-- commit_width determins the number of ports of register files --> |
| <param name="fp_issue_width" value="1"/> |
| <param name="prediction_width" value="1"/> |
| <!-- number of branch instructions can be predicted simultannouesl--> |
| <!-- Current version of McPAT does not distinguish int and floating point pipelines |
| Theses parameters are reserved for future use.--> |
| <param name="pipelines_per_core" value="1,1"/> |
| <!--integer_pipeline and floating_pipelines, if the floating_pipelines is 0, then the pipeline is shared--> |
| <param name="pipeline_depth" value="8,8"/> |
| <!-- pipeline depth of int and fp, if pipeline is shared, the second number is the average cycles of fp ops --> |
| <!-- issue and exe unit--> |
| <param name="ALU_per_core" value="3"/> |
| <!-- contains an adder, a shifter, and a logical unit --> |
| <param name="MUL_per_core" value="1"/> |
| <!-- For MUL and Div --> |
| <param name="FPU_per_core" value="1"/> |
| <!-- buffer between IF and ID stage --> |
| <param name="instruction_buffer_size" value="32"/> |
| <!-- buffer between ID and sche/exe stage --> |
| <param name="decoded_stream_buffer_size" value="16"/> |
| <param name="instruction_window_scheme" value="0"/><!-- 0 PHYREG based, 1 RSBASED--> |
| <!-- McPAT support 2 types of OoO cores, RS based and physical reg based--> |
| <param name="instruction_window_size" value="20"/> |
| <param name="fp_instruction_window_size" value="15"/> |
| <!-- Numbers need to be confirmed --> |
| <!-- the instruction issue Q as in Alpha 21264; The RS as in Intel P6 --> |
| <param name="ROB_size" value="0"/> |
| <!-- each in-flight instruction has an entry in ROB --> |
| <!-- registers --> |
| <param name="archi_Regs_IRF_size" value="32"/> |
| <param name="archi_Regs_FRF_size" value="32"/> |
| <!-- if OoO processor, phy_reg number is needed for renaming logic, |
| renaming logic is for both integer and floating point insts. --> |
| <param name="phy_Regs_IRF_size" value="64"/> |
| <param name="phy_Regs_FRF_size" value="64"/> |
| <!-- rename logic --> |
| <param name="rename_scheme" value="0"/> |
| <!-- can be RAM based(0) or CAM based(1) rename scheme |
| RAM-based scheme will have free list, status table; |
| CAM-based scheme have the valid bit in the data field of the CAM |
| both RAM and CAM need RAM-based checkpoint table, checkpoint_depth=# of in_flight instructions; |
| Detailed RAT Implementation see TR --> |
| <param name="register_windows_size" value="0"/> |
| <!-- how many windows in the windowed register file, sun processors; |
| no register windowing is used when this number is 0 --> |
| <!-- In OoO cores, loads and stores can be issued whether inorder(Pentium Pro) or (OoO)out-of-order(Alpha), |
| They will always try to exeute out-of-order though. --> |
| <param name="LSU_order" value="inorder"/> |
| <param name="store_buffer_size" value="4"/> |
| <!-- By default, in-order cores do not have load buffers --> |
| <param name="load_buffer_size" value="0"/> |
| <!-- number of ports refer to sustainable concurrent memory accesses --> |
| <param name="memory_ports" value="1"/> |
| <!-- max_allowed_in_flight_memo_instructions determins the # of ports of load and store buffer |
| as well as the ports of Dcache which is connected to LSU --> |
| <!-- dual-pumped Dcache can be used to save the extra read/write ports --> |
| <param name="RAS_size" value="4"/> |
| <!-- general stats, defines simulation periods;require total, idle, and busy cycles for senity check --> |
| <!-- please note: if target architecture is X86, then all the instrucions refer to (fused) micro-ops --> |
| <stat name="total_instructions" value="400000"/> |
| <stat name="int_instructions" value="200000"/> |
| <stat name="fp_instructions" value="100000"/> |
| <stat name="branch_instructions" value="100000"/> |
| <stat name="branch_mispredictions" value="0"/> |
| <stat name="load_instructions" value="0"/> |
| <stat name="store_instructions" value="50000"/> |
| <stat name="committed_instructions" value="400000"/> |
| <stat name="committed_int_instructions" value="200000"/> |
| <stat name="committed_fp_instructions" value="100000"/> |
| <stat name="pipeline_duty_cycle" value="1"/><!--<=1, runtime_ipc/peak_ipc; averaged for all cores if homogenous --> |
| <!-- the following cycle stats are used for heterogeneouse cores only, |
| please ignore them if homogeneouse cores --> |
| <stat name="total_cycles" value="100000"/> |
| <stat name="idle_cycles" value="0"/> |
| <stat name="busy_cycles" value="100000"/> |
| <!-- instruction buffer stats --> |
| <!-- ROB stats, both RS and Phy based OoOs have ROB |
| performance simulator should capture the difference on accesses, |
| otherwise, McPAT has to guess based on number of commited instructions. --> |
| <stat name="ROB_reads" value="400000"/> |
| <stat name="ROB_writes" value="400000"/> |
| <!-- RAT accesses --> |
| <stat name="rename_reads" value="800000"/> <!--lookup in renaming logic --> |
| <stat name="rename_writes" value="400000"/><!--update dest regs. renaming logic --> |
| <stat name="fp_rename_reads" value="200000"/> |
| <stat name="fp_rename_writes" value="100000"/> |
| <!-- decode and rename stage use this, should be total ic - nop --> |
| <!-- Inst window stats --> |
| <stat name="inst_window_reads" value="400000"/> |
| <stat name="inst_window_writes" value="400000"/> |
| <stat name="inst_window_wakeup_accesses" value="800000"/> |
| <stat name="fp_inst_window_reads" value="200000"/> |
| <stat name="fp_inst_window_writes" value="200000"/> |
| <stat name="fp_inst_window_wakeup_accesses" value="400000"/> |
| <!-- RF accesses --> |
| <stat name="int_regfile_reads" value="600000"/> |
| <stat name="float_regfile_reads" value="100000"/> |
| <stat name="int_regfile_writes" value="300000"/> |
| <stat name="float_regfile_writes" value="50000"/> |
| <!-- accesses to the working reg --> |
| <stat name="function_calls" value="5"/> |
| <stat name="context_switches" value="260343"/> |
| <!-- Number of Windowes switches (number of function calls and returns)--> |
| <!-- Alu stats by default, the processor has one FPU that includes the divider and |
| multiplier. The fpu accesses should include accesses to multiplier and divider --> |
| <stat name="ialu_accesses" value="300000"/> |
| <stat name="fpu_accesses" value="100000"/> |
| <stat name="mul_accesses" value="200000"/> |
| <stat name="cdb_alu_accesses" value="300000"/> |
| <stat name="cdb_mul_accesses" value="200000"/> |
| <stat name="cdb_fpu_accesses" value="100000"/> |
| <!-- multiple cycle accesses should be counted multiple times, |
| otherwise, McPAT can use internal counter for different floating point instructions |
| to get final accesses. But that needs detailed info for floating point inst mix --> |
| <!-- currently the performance simulator should |
| make sure all the numbers are final numbers, |
| including the explicit read/write accesses, |
| and the implicite accesses such as replacements and etc. |
| Future versions of McPAT may be able to reason the implicite access |
| based on param and stats of last level cache |
| The same rule applies to all cache access stats too! --> |
| <!-- following is AF for max power computation. |
| Do not change them, unless you understand them--> |
| <stat name="IFU_duty_cycle" value="0.9"/> |
| <stat name="BR_duty_cycle" value="0.72"/><!--branch--> |
| <stat name="LSU_duty_cycle" value="0.71"/> |
| <stat name="MemManU_I_duty_cycle" value="0.9"/> |
| <stat name="MemManU_D_duty_cycle" value="0.71"/> |
| <stat name="ALU_duty_cycle" value="0.76"/> |
| <!-- (.78*2+.71)/3 --> |
| <stat name="MUL_duty_cycle" value="0.82"/> |
| <stat name="FPU_duty_cycle" value="0.0"/> |
| <stat name="ALU_cdb_duty_cycle" value="0.76"/> |
| <stat name="MUL_cdb_duty_cycle" value="0.82"/> |
| <stat name="FPU_cdb_duty_cycle" value="0.0"/> |
| <param name="number_of_BPT" value="2"/> |
| <component id="system.core0.predictor" name="PBT"> |
| <!-- branch predictor; tournament predictor see Alpha implementation --> |
| <param name="local_predictor_size" value="10,3"/> |
| <param name="local_predictor_entries" value="4"/> |
| <param name="global_predictor_entries" value="4096"/> |
| <param name="global_predictor_bits" value="2"/> |
| <param name="chooser_predictor_entries" value="4096"/> |
| <param name="chooser_predictor_bits" value="2"/> |
| <!-- These parameters can be combined like below in next version |
| <param name="load_predictor" value="10,3,1024"/> |
| <param name="global_predictor" value="4096,2"/> |
| <param name="predictor_chooser" value="4096,2"/> |
| --> |
| </component> |
| <component id="system.core0.itlb" name="itlb"> |
| <param name="number_entries" value="64"/> |
| <stat name="total_accesses" value="200000"/> |
| <stat name="total_misses" value="4"/> |
| <stat name="conflicts" value="0"/> |
| <!-- there is no write requests to itlb although writes happen to itlb after miss, |
| which is actually a replacement --> |
| </component> |
| <component id="system.core0.icache" name="icache"> |
| <!-- there is no write requests to itlb although writes happen to it after miss, |
| which is actually a replacement --> |
| <param name="icache_config" value="32768,8,4,1,10,10,32,0"/> |
| <!-- the parameters are capacity,block_width, associativity, bank, throughput w.r.t. core clock, latency w.r.t. core clock,output_width, cache policy, --> |
| <!-- cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate --> |
| <param name="buffer_sizes" value="4, 4, 4,0"/> |
| <!-- cache controller buffer sizes: miss_buffer_size(MSHR),fill_buffer_size,prefetch_buffer_size,wb_buffer_size--> |
| <stat name="read_accesses" value="200000"/> |
| <stat name="read_misses" value="0"/> |
| <stat name="conflicts" value="0"/> |
| </component> |
| <component id="system.core0.dtlb" name="dtlb"> |
| <param name="number_entries" value="64"/><!--dual threads--> |
| <stat name="total_accesses" value="400000"/> |
| <stat name="total_misses" value="4"/> |
| <stat name="conflicts" value="0"/> |
| </component> |
| <component id="system.core0.dcache" name="dcache"> |
| <!-- all the buffer related are optional --> |
| <param name="dcache_config" value="32768,8,4,1, 10,10, 32,1 "/> |
| <param name="buffer_sizes" value="4, 4, 4, 4"/> |
| <!-- cache controller buffer sizes: miss_buffer_size(MSHR),fill_buffer_size,prefetch_buffer_size,wb_buffer_size--> |
| <stat name="read_accesses" value="800000"/> |
| <stat name="write_accesses" value="27276"/> |
| <stat name="read_misses" value="1632"/> |
| <stat name="write_misses" value="183"/> |
| <stat name="conflicts" value="0"/> |
| </component> |
| <param name="number_of_BTB" value="2"/> |
| <component id="system.core0.BTB" name="BTB"> |
| <!-- all the buffer related are optional --> |
| <param name="BTB_config" value="4096,4,2, 2, 1,1"/> |
| <!-- the parameters are capacity,block_width,associativity,bank, throughput w.r.t. core clock, latency w.r.t. core clock,--> |
| <stat name="read_accesses" value="400000"/> <!--See IFU code for guideline --> |
| <stat name="write_accesses" value="0"/> |
| </component> |
| </component> |
| <component id="system.L1Directory0" name="L1Directory0"> |
| <param name="Directory_type" value="0"/> |
| <!--0 cam based shadowed tag. 1 directory cache --> |
| <param name="Dir_config" value="2048,1,0,1, 4, 4, 8"/> |
| <!-- the parameters are capacity,block_width, associativity,bank, throughput w.r.t. core clock, latency w.r.t. core clock,--> |
| <param name="buffer_sizes" value="8, 8, 8, 8"/> |
| <!-- all the buffer related are optional --> |
| <param name="clockrate" value="2000"/> |
| <param name="ports" value="1,1,1"/> |
| <!-- number of r, w, and rw search ports --> |
| <param name="device_type" value="2"/> |
| <!-- altough there are multiple access types, |
| Performance simulator needs to cast them into reads or writes |
| e.g. the invalidates can be considered as writes --> |
| <stat name="read_accesses" value="800000"/> |
| <stat name="write_accesses" value="27276"/> |
| <stat name="read_misses" value="1632"/> |
| <stat name="write_misses" value="183"/> |
| <stat name="conflicts" value="20"/> |
| <stat name="duty_cycle" value="0.1"/> |
| </component> |
| <component id="system.L2Directory0" name="L2Directory0"> |
| <param name="Directory_type" value="1"/> |
| <!--0 cam based shadowed tag. 1 directory cache --> |
| <param name="Dir_config" value="1048576,16,16,1,2, 100"/> |
| <!-- the parameters are capacity,block_width, associativity,bank, throughput w.r.t. core clock, latency w.r.t. core clock,--> |
| <param name="buffer_sizes" value="8, 8, 8, 8"/> |
| <!-- all the buffer related are optional --> |
| <param name="clockrate" value="3400"/> |
| <param name="ports" value="1,1,1"/> |
| <!-- number of r, w, and rw search ports --> |
| <param name="device_type" value="0"/> |
| <!-- altough there are multiple access types, |
| Performance simulator needs to cast them into reads or writes |
| e.g. the invalidates can be considered as writes --> |
| <stat name="read_accesses" value="58824"/> |
| <stat name="write_accesses" value="27276"/> |
| <stat name="read_misses" value="1632"/> |
| <stat name="write_misses" value="183"/> |
| <stat name="conflicts" value="100"/> |
| <stat name="duty_cycle" value="0.1"/> |
| </component> |
| <component id="system.L20" name="L20"> |
| <!-- all the buffer related are optional --> |
| <param name="L2_config" value="1048576,32, 8, 8, 8, 23, 32, 1"/> |
| <!-- the parameters are capacity,block_width, associativity, bank, throughput w.r.t. core clock, latency w.r.t. core clock,output_width, cache policy --> |
| <param name="buffer_sizes" value="16, 16, 16, 16"/> |
| <!-- cache controller buffer sizes: miss_buffer_size(MSHR),fill_buffer_size,prefetch_buffer_size,wb_buffer_size--> |
| <param name="clockrate" value="3400"/> |
| <param name="ports" value="1,1,1"/> |
| <!-- number of r, w, and rw ports --> |
| <param name="device_type" value="0"/> |
| <stat name="read_accesses" value="200000"/> |
| <stat name="write_accesses" value="27276"/> |
| <stat name="read_misses" value="1632"/> |
| <stat name="write_misses" value="183"/> |
| <stat name="conflicts" value="0"/> |
| <stat name="duty_cycle" value="1.0"/> |
| </component> |
| |
| <!--**********************************************************************--> |
| <component id="system.L30" name="L30"> |
| <param name="L3_config" value="16777216,64,16, 16, 16, 100,1"/> |
| <!-- the parameters are capacity,block_width, associativity,bank, throughput w.r.t. core clock, latency w.r.t. core clock,--> |
| <param name="clockrate" value="800"/> |
| <param name="ports" value="1,1,1"/> |
| <!-- number of r, w, and rw ports --> |
| <param name="device_type" value="0"/> |
| <param name="buffer_sizes" value="16, 16, 16, 16"/> |
| <!-- cache controller buffer sizes: miss_buffer_size(MSHR),fill_buffer_size,prefetch_buffer_size,wb_buffer_size--> |
| <stat name="read_accesses" value="11824"/> |
| <stat name="write_accesses" value="11276"/> |
| <stat name="read_misses" value="1632"/> |
| <stat name="write_misses" value="183"/> |
| <stat name="conflicts" value="0"/> |
| <stat name="duty_cycle" value="1.0"/> |
| </component> |
| <!--**********************************************************************--> |
| <component id="system.NoC0" name="noc0"> |
| <param name="clockrate" value="2000"/> |
| <param name="type" value="0"/> |
| <!--0:bus, 1:NoC , for bus no matter how many nodes sharing the bus |
| at each time only one node can send req --> |
| <param name="horizontal_nodes" value="1"/> |
| <param name="vertical_nodes" value="1"/> |
| <param name="has_global_link" value="0"/> |
| <!-- 1 has global link, 0 does not have global link --> |
| <param name="link_throughput" value="1"/><!--w.r.t clock --> |
| <param name="link_latency" value="1"/><!--w.r.t clock --> |
| <!-- througput >= latency --> |
| <!-- Router architecture --> |
| <param name="input_ports" value="1"/> |
| <param name="output_ports" value="1"/> |
| <!-- For bus the I/O ports should be 1 --> |
| <param name="flit_bits" value="64"/> |
| <param name="chip_coverage" value="1"/> |
| <!-- When multiple NOC present, one NOC will cover part of the whole chip. |
| chip_coverage <=1 --> |
| <param name="link_routing_over_percentage" value="0.5"/> |
| <!-- Links can route over other components or occupy whole area. |
| by default, 50% of the NoC global links routes over other |
| components --> |
| <stat name="total_accesses" value="100000"/> |
| <!-- This is the number of total accesses within the whole network not for each router --> |
| <stat name="duty_cycle" value="0.2"/> |
| </component> |
| <!--**********************************************************************--> |
| <component id="system.mem" name="mem"> |
| <!-- Main memory property --> |
| <param name="mem_tech_node" value="32"/> |
| <param name="device_clock" value="200"/><!--MHz, this is clock rate of the actual memory device, not the FSB --> |
| <param name="peak_transfer_rate" value="6400"/><!--MB/S--> |
| <param name="internal_prefetch_of_DRAM_chip" value="4"/> |
| <!-- 2 for DDR, 4 for DDR2, 8 for DDR3...--> |
| <!-- the device clock, peak_transfer_rate, and the internal prefetch decide the DIMM property --> |
| <!-- above numbers can be easily found from Wikipedia --> |
| <param name="capacity_per_channel" value="4096"/> <!-- MB --> |
| <!-- capacity_per_Dram_chip=capacity_per_channel/number_of_dimms/number_ranks/Dram_chips_per_rank |
| Current McPAT assumes single DIMMs are used.--> |
| <param name="number_ranks" value="2"/> |
| <param name="num_banks_of_DRAM_chip" value="8"/> |
| <param name="Block_width_of_DRAM_chip" value="64"/> <!-- B --> |
| <param name="output_width_of_DRAM_chip" value="8"/> |
| <!--number of Dram_chips_per_rank=" 72/output_width_of_DRAM_chip--> |
| <!--number of Dram_chips_per_rank=" 72/output_width_of_DRAM_chip--> |
| <param name="page_size_of_DRAM_chip" value="8"/> <!-- 8 or 16 --> |
| <param name="burstlength_of_DRAM_chip" value="8"/> |
| <stat name="memory_accesses" value="1052"/> |
| <stat name="memory_reads" value="1052"/> |
| <stat name="memory_writes" value="1052"/> |
| </component> |
| <component id="system.mc" name="mc"> |
| <!-- Memeory controllers are for DDR(2,3...) DIMMs --> |
| <!-- current version of McPAT uses published values for base parameters of memory controller |
| improvments on MC will be added in later versions. --> |
| <param name="type" value="1"/> <!-- 1: low power; 0 high performance --> |
| <param name="mc_clock" value="400"/><!--MHz--> |
| <param name="peak_transfer_rate" value="6400"/><!--MB/S--> |
| <param name="block_size" value="64"/><!--(B) the block size of last level cache, which is the unit for one memory burst transfer --> |
| <param name="number_mcs" value="1"/> |
| <!-- current McPAT only supports homogeneous memory controllers --> |
| <param name="memory_channels_per_mc" value="1"/> |
| <param name="number_ranks" value="0"/> |
| <!-- # of ranks of each channel--> |
| <param name="req_window_size_per_channel" value="32"/> |
| <param name="IO_buffer_size_per_channel" value="32"/> |
| <param name="databus_width" value="128"/> |
| <param name="addressbus_width" value="51"/> |
| <!-- McPAT will add the control bus width to the addressbus width automatically --> |
| <stat name="memory_accesses" value="66666"/> |
| <stat name="memory_reads" value="33333"/> |
| <stat name="memory_writes" value="33333"/> |
| <param name="withPHY" value="1"/> |
| <!-- McPAT does not track individual mc, instead, it takes the total accesses and calculate |
| the average power per MC or per channel. This is sufficent for most application. |
| Further trackdown can be easily added in later versions. --> |
| </component> |
| <!--**********************************************************************--> |
| <component id="system.niu" name="niu"> |
| <!-- On chip 10Gb Ethernet NIC, including XAUI Phy and MAC controller --> |
| <!-- For a minimum IP packet size of 84B at 10Gb/s, a new packet arrives every 67.2ns. |
| the low bound of clock rate of a 10Gb MAC is 150Mhz --> |
| <param name="type" value="1"/> <!-- 1: low power; 0 high performance --> |
| <param name="clockrate" value="350"/> |
| <param name="number_units" value="1"/> <!-- unlike PCIe and memory controllers, each Ethernet controller only have one port --> |
| <stat name="duty_cycle" value="1.0"/> <!-- achievable max load <= 1.0 --> |
| <stat name="total_load_perc" value="0.7"/> <!-- ratio of total achived load to total achivable bandwidth --> |
| <!-- McPAT does not track individual nic, instead, it takes the total accesses and calculate |
| the average power per nic or per channel. This is sufficent for most application. --> |
| </component> |
| <!--**********************************************************************--> |
| <component id="system.pcie" name="pcie"> |
| <!-- On chip PCIe controller, including Phy--> |
| <!-- For a minimum PCIe packet size of 84B at 8Gb/s per lane (PCIe 3.0), a new packet arrives every 84ns. |
| the low bound of clock rate of a PCIe per lane logic is 120Mhz --> |
| <param name="type" value="1"/> <!-- 1: low power; 0 high performance --> |
| <param name="withPHY" value="1"/> |
| <param name="clockrate" value="350"/> |
| <param name="number_units" value="1"/> |
| <param name="num_channels" value="8"/> <!-- 2 ,4 ,8 ,16 ,32 --> |
| <stat name="duty_cycle" value="1.0"/> <!-- achievable max load <= 1.0 --> |
| <stat name="total_load_perc" value="0.7"/> <!-- Percentage of total achived load to total achivable bandwidth --> |
| <!-- McPAT does not track individual pcie controllers, instead, it takes the total accesses and calculate |
| the average power per pcie controller or per channel. This is sufficent for most application. --> |
| </component> |
| <!--**********************************************************************--> |
| <component id="system.flashc" name="flashc"> |
| <param name="number_flashcs" value="1"/> |
| <param name="type" value="1"/> <!-- 1: low power; 0 high performance --> |
| <param name="withPHY" value="1"/> |
| <param name="peak_transfer_rate" value="200"/><!--Per controller sustainable reak rate MB/S --> |
| <stat name="duty_cycle" value="1.0"/> <!-- achievable max load <= 1.0 --> |
| <stat name="total_load_perc" value="0.7"/> <!-- Percentage of total achived load to total achivable bandwidth --> |
| <!-- McPAT does not track individual flash controller, instead, it takes the total accesses and calculate |
| the average power per fc or per channel. This is sufficent for most application --> |
| </component> |
| <!--**********************************************************************--> |
| |
| </component> |
| </component> |