misc: Merge the v22.1 release staging into stable
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index b94899f..ebbbb1a 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -22,3 +22,7 @@
c3bd8eb1214cbebbc92c7958b80aa06913bce3ba
488ded0c8d9e43deef531ad174937982b41f8e4b
26e888965d08486aeed7ebb3ef934ceb1a38cd6f
+
+# A commit which ran Python Black on all Python files.
+# https://gem5-review.googlesource.com/c/public/gem5/+/47024
+787204c92d876dd81357b75aede52d8ef5e053d3
diff --git a/.gitignore b/.gitignore
index 90a6bb2..229a0d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,4 @@
configs/example/memcheck.cfg
configs/dram/lowp_sweep.cfg
.pyenv
+.vscode
diff --git a/.mailmap b/.mailmap
index 3cc7825..49c438d 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1,37 +1,43 @@
-ARM gem5 Developers <none@none>
Abdul Mutaal Ahmad <abdul.mutaal@gmail.com>
+adarshpatil <adarshpatil123@gmail.com>
+Adrià Armejach <adria.armejach@bsc.es> Adrià Armejach <adria.armejach@gmail.com>
Adrian Herrera <adrian.herrera@arm.com>
Adrien Pesle <adrien.pesle@arm.com>
-Adrià Armejach <adria.armejach@bsc.es> Adrià Armejach <adria.armejach@gmail.com>
Akash Bagdia <akash.bagdia@ARM.com> Akash Bagdia <akash.bagdia@arm.com>
Alec Roelke <alec.roelke@gmail.com> Alec Roelke <ar4jc@virginia.edu>
+Alexander Klimov <Alexander.Klimov@arm.com>
Alexandru Dutu <alexandru.dutu@amd.com> Alexandru <alexandru.dutu@amd.com>
+Alex Richardson <alexrichardson@google.com>
Ali Jafri <ali.jafri@arm.com>
-Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <Ali.Saidi@ARM.com>
Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <ali.saidi@arm.com>
+Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <Ali.Saidi@ARM.com>
Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <saidi@eecs.umich.edu>
+Alistair Delva <adelva@google.com>
Amin Farmahini <aminfar@gmail.com>
Anders Handler <s052838@student.dtu.dk>
-Andrea Mondelli <andrea.mondelli@ucf.edu> Andrea Mondelli <Andrea.Mondelli@ucf.edu>
+Andrea Mondelli <andrea.mondelli@huawei.com> Andrea Mondelli <andrea.mondelli@ucf.edu>
+Andrea Mondelli <andrea.mondelli@huawei.com> Andrea Mondelli <Andrea.Mondelli@ucf.edu>
Andrea Pellegrini <andrea.pellegrini@gmail.com>
-Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <Andreas.Hansson@ARM.com>
Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson>
Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson@arm.com>
+Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <Andreas.Hansson@ARM.com>
Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson@armm.com>
-Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <Andreas.Sandberg@ARM.com>
Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <andreas.sandberg@arm.com>
+Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <Andreas.Sandberg@ARM.com>
Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <andreas@sandberg.pp.se>
Andrew Bardsley <Andrew.Bardsley@arm.com> Andrew Bardsley <Andreas.Bardsley@arm.com>
Andrew Lukefahr <lukefahr@umich.edu>
Andrew Schultz <alschult@umich.edu>
Andriani Mappoura <andriani.mappoura@arm.com>
-Ani Udipi <ani.udipi@arm.com>
+Angie Lee <peiyinglee@google.com>
Anis Peysieux <anis.peysieux@inria.fr>
+Ani Udipi <ani.udipi@arm.com>
Anouk Van Laer <anouk.vanlaer@arm.com>
-Arthur Perais <arthur.perais@inria.fr>
+ARM gem5 Developers <none@none>
+Arthur Perais <Arthur.Perais@univ-grenoble-alpes.fr> Arthur Perais <arthur.perais@inria.fr>
+Arun Rodrigues <afrodri@gmail.com>
Ashkan Tousi <ashkan.tousimojarad@arm.com>
-Austin Harris <austinharris@utexas.edu>
-Richard D. Strong <r.d.strong@gmail.com>
+Austin Harris <austinharris@utexas.edu> Austin Harris <mail@austin-harris.com>
Avishai Tvila <avishai.tvila@gmail.com>
Ayaz Akram <yazakram@ucdavis.edu>
Bagus Hanindhito <hanindhito@bagus.my.id>
@@ -41,80 +47,108 @@
Bjoern A. Zeeb <baz21@cam.ac.uk>
Blake Hechtman <bah13@duke.edu> Blake Hechtman <blake.hechtman@amd.com>
Blake Hechtman <bah13@duke.edu> Blake Hechtman ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <bah13@duke.edu>
-Bobby R. Bruce <bbruce@ucdavis.edu>
+Bobby R. Bruce <bbruce@ucdavis.edu> Bobby Bruce <bbruce@amarillo.cs.ucdavis.edu>
Boris Shingarov <shingarov@gmail.com> Boris Shingarov <shingarov@labware.com>
Brad Beckmann <brad.beckmann@amd.com> Brad Beckmann <Brad.Beckmann@amd.com>
Brad Beckmann <brad.beckmann@amd.com> Brad Beckmann ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <Brad.Beckmann@amd.com>
Brad Danofsky <bradley.danofsky@amd.com>
Bradley Wang <radwang@ucdavis.edu> Bradley <animalvgamer@gmail.com>
+Brandon Potter <brandon.potter@amd.com> BKP <brandon.potter@amd.com>
Brandon Potter <brandon.potter@amd.com> bpotter <brandon.potter@amd.com>
Brandon Potter <brandon.potter@amd.com> Brandon Potter <Brandon.Potter@amd.com>
-Brandon Potter <brandon.potter@amd.com> BKP <brandon.potter@amd.com>
Brian Grayson <b.grayson@samsung.com>
Cagdas Dirik <cdirik@micron.com> cdirik <cdirik@micron.com>
+Carlos Falquez <c.falquez@fz-juelich.de>
Chander Sudanthi <chander.sudanthi@arm.com> Chander Sudanthi <Chander.Sudanthi@arm.com>
Chander Sudanthi <chander.sudanthi@arm.com> Chander Sudanthi <Chander.Sudanthi@ARM.com>
+Charles Jamieson <cjamieson2@wisc.edu>
+CHEN Meng <tundriolaxy@gmail.com>
Chen Zou <chenzou@uchicago.edu>
+Chia-You Chen <hortune@google.com>
+Chow, Marcus <marcus.chow@amd.com>
Chris Adeniyi-Jones <Chris.Adeniyi-Jones@arm.com>
-Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@ARM.com>
Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@arm.com>
+Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@ARM.com>
+Chris January <chris.january@arm.com>
Christian Menard <christian.menard@tu-dresden.de> Christian Menard <Christian.Menard@tu-dresden.de>
-Christoph Pfister <pfistchr@student.ethz.ch>
Christopher Torng <clt67@cornell.edu>
+Christoph Pfister <pfistchr@student.ethz.ch>
Chuan Zhu <chuan.zhu@arm.com>
Chun-Chen Hsu <chunchenhsu@google.com> Chun-Chen TK Hsu <chunchenhsu@google.com>
Ciro Santilli <ciro.santilli@arm.com>
Clint Smullen <cws3k@cs.virginia.edu>
+Cui Jin <cuijinbird@gmail.com> Cui Jin <cuijin7@huawei.com>
Curtis Dunham <Curtis.Dunham@arm.com>
+Daecheol You <daecheol.you@samsung.com>
Dam Sunwoo <dam.sunwoo@arm.com>
Dan Gibson <gibson@cs.wisc.edu>
Daniel Carvalho <odanrc@yahoo.com.br> Daniel <odanrc@yahoo.com.br>
Daniel Carvalho <odanrc@yahoo.com.br> Daniel R. Carvalho <odanrc@yahoo.com.br>
+Daniel Gerzhoy <daniel.gerzhoy@gmail.com>
Daniel Johnson <daniel.johnson@arm.com>
Daniel Sanchez <sanchezd@stanford.edu>
+Davide Basilio Bartolini <davide.basilio.bartolini@huawei.com>
David Guillen-Fandos <david.guillen@arm.com> David Guillen <david.guillen@arm.com>
David Guillen-Fandos <david.guillen@arm.com> David Guillen Fandos <david.guillen@arm.com>
David Hashe <david.hashe@amd.com> David Hashe <david.j.hashe@gmail.com>
David Oehmke <doehmke@umich.edu>
+David Schall <david.schall2@arm.com>
+Derek Christ <dchrist@rhrk.uni-kl.de>
Derek Hower <drh5@cs.wisc.edu>
-Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo <guodeyuan@tsinghua.org.cn>
Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <guodeyuan@tsinghua.org.cn>
+Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo <guodeyuan@tsinghua.org.cn>
Dibakar Gope <gope@wisc.edu> Dibakar Gope ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <gope@wisc.edu>
+Dimitrios Chasapis <k4s4s.heavener@gmail.com>
Djordje Kovacevic <djordje.kovacevic@arm.com> Djordje Kovacevic <Djordje.Kovacevic@arm.com>
-Dongxue Zhang <elta.era@gmail.com>
Doğukan Korkmaztürk <d.korkmazturk@gmail.com>
+Dongxue Zhang <elta.era@gmail.com>
Dylan Johnson <Dylan.Johnson@ARM.com>
Earl Ou <shunhsingou@google.com>
+eavivi <eavivi@ucdavis.edu>
+Éder F. Zulian <zulian@eit.uni-kl.de>
Edmund Grimley Evans <Edmund.Grimley-Evans@arm.com>
+Eduardo José Gómez Hernández <eduardojose.gomez@um.es>
+Eliot Moss <moss@cs.umass.edu>
Emilio Castillo <castilloe@unican.es> Emilio Castillo <ecastill@bsc.es>
Emilio Castillo <castilloe@unican.es> Emilio Castillo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <castilloe@unican.es>
+Emily Brickey <esbrickey@ucdavis.edu>
Erfan Azarkhish <erfan.azarkhish@unibo.it>
+Erhu <fengerhu.ipads@gmail.com>
Eric Van Hensbergen <eric.vanhensbergen@arm.com> Eric Van Hensbergen <Eric.VanHensbergen@ARM.com>
+Eric Ye <ericye@google.com>
Erik Hallnor <ehallnor@umich.edu>
Erik Tomusk <E.Tomusk@sms.ed.ac.uk>
Faissal Sleiman <Faissal.Sleiman@arm.com> Faissal Sleiman <sleimanf@umich.edu>
Fernando Endo <fernando.endo2@gmail.com>
+Franklin He <franklinh@google.com>
Gabe Black <gabe.black@gmail.com> Gabe Black <gabeblack@google.com>
Gabe Black <gabe.black@gmail.com> Gabe Black <gblack@eecs.umich.edu>
+Gabe Loh <gabriel.loh@amd.com> gloh <none@none>
Gabor Dozsa <gabor.dozsa@arm.com>
+Gabriel Busnot <gabriel.busnot@arteris.com>
+gauravjain14 <gjain6@wisc.edu>
Gedare Bloom <gedare@rtems.org> Gedare Bloom <gedare@gwmail.gwu.edu>
Gene Wu <gene.wu@arm.com> Gene WU <gene.wu@arm.com>
Gene WU <gene.wu@arm.com> Gene Wu <Gene.Wu@arm.com>
-Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <Geoffrey.Blake@arm.com>
Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <blakeg@umich.edu>
+Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <Geoffrey.Blake@arm.com>
Georg Kotheimer <georg.kotheimer@mailbox.tu-dresden.de>
Giacomo Gabrielli <giacomo.gabrielli@arm.com> Giacomo Gabrielli <Giacomo.Gabrielli@arm.com>
Giacomo Travaglini <giacomo.travaglini@arm.com>
Glenn Bergmans <glenn.bergmans@arm.com>
+GWDx <gwdx@mail.ustc.edu.cn>
Hamid Reza Khaleghzadeh <khaleghzadeh@gmail.com> Hamid Reza Khaleghzadeh ext:(%2C%20Lluc%20Alvarez%20%3Clluc.alvarez%40bsc.es%3E%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <khaleghzadeh@gmail.com>
+handsomeliu <handsomeliu@google.com>
Hanhwi Jang <jang.hanhwi@gmail.com>
Hoa Nguyen <hoanguyen@ucdavis.edu>
Hongil Yoon <ongal@cs.wisc.edu>
Hsuan Hsu <hsuan.hsu@mediatek.com>
+huangjs <jiasen.hjs@alibaba-inc.com>
Hussein Elnawawy <hussein.elnawawy@gmail.com>
Ian Jiang <ianjiang.ict@gmail.com>
IanJiangICT <ianjiang.ict@gmail.com>
Ilias Vougioukas <Ilias.Vougioukas@ARM.com>
+Iru Cai <mytbk920423@gmail.com>
Isaac Richter <isaac.richter@rochester.edu>
Isaac Sánchez Barrera <isaac.sanchez@bsc.es>
Ivan Pizarro <ivan.pizarro@metempsy.com>
@@ -123,104 +157,152 @@
Jakub Jermar <jakub@jermar.eu>
James Clarkson <james.clarkson@arm.com>
Jan-Peter Larsson <jan-peter.larsson@arm.com>
-Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <power.jg@gmail.com>
+Jan Vrany <jan.vrany@labware.com>
+Jarvis Jia <jia44@wisc.edu>
+Jasjeet Rangi <jasrangi@ucdavis.edu>
Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <powerjg@cs.wisc.edu>
-Jason Lowe-Power <jason@lowepower.com> Jason Power <power.jg@gmail.com>
-Jason Lowe-Power <jason@lowepower.com> Jason Power <powerjg@cs.wisc.edu>
+Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <power.jg@gmail.com>
Jason Lowe-Power <jason@lowepower.com> Jason Power ext:(%2C%20Joel%20Hestness%20%3Chestness%40cs.wisc.edu%3E) <power.jg@gmail.com>
+Jason Lowe-Power <jason@lowepower.com> Jason Power <powerjg@cs.wisc.edu>
+Jason Lowe-Power <jason@lowepower.com> Jason Power <power.jg@gmail.com>
+Jason Yu <yuzhijingcheng1996@hotmail.com>
Javier Bueno Hedo <javier.bueno@metempsy.com> Javier Bueno <javier.bueno@metempsy.com>
Javier Cano-Cano <javier.cano555@gmail.com>
+Javier Garcia Hernandez <avefenixavefenix@gmail.com>
Javier Setoain <javier.setoain@arm.com>
Jayneel Gandhi <jayneel@cs.wisc.edu>
Jennifer Treichler <jtreichl@umich.edu>
-Jieming Yin <jieming.yin@amd.com>
+Jerin Joy <joy@rivosinc.com>
+Jiajie Chen <c@jia.je>
+Jiasen Huang <jiasen.hjs@alibaba-inc.com>
+Jiasen <jiasen.hjs@alibaba-inc.com>
+Jiayi Huang <jyhuang91@gmail.com>
+jiegec <noc@jiegec.ac.cn>
+Jieming Yin <jieming.yin@amd.com> jiemingyin <bjm419@gmail.com>
Jing Qu <jqu32@wisc.edu> JingQuJQ <jqu32@wisc.edu>
Jiuyue Ma <majiuyue@ncic.ac.cn>
Joe Gross <joe.gross@amd.com> Joe Gross <joseph.gross@amd.com>
+Joel Hestness <jthestness@gmail.com> Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <jthestness@gmail.com>
Joel Hestness <jthestness@gmail.com> Joel Hestness <hestness@cs.utexas.edu>
Joel Hestness <jthestness@gmail.com> Joel Hestness <hestness@cs.wisc.edu>
-Joel Hestness <jthestness@gmail.com> Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <jthestness@gmail.com>
+Joël Porquet-Lupine <joel@porquet.org>
John Alsop <johnathan.alsop@amd.com>
John Kalamatianos <john.kalamatianos@amd.com> jkalamat <john.kalamatianos@amd.com>
Jordi Vaquero <jordi.vaquero@metempsy.com>
Jose Marinho <jose.marinho@arm.com>
+Juan M. Cebrian <jm.cebriangonzalez@gmail.com>
Jui-min Lee <fcrh@google.com>
+kai.ren <kai.ren@streamcomputing.com> Kai Ren <binarystar2006@outlook.com>
Kanishk Sugand <kanishk.sugand@arm.com>
Karthik Sangaiah <karthik.sangaiah@arm.com>
+Kaustav Goswami <kggoswami@ucdavis.edu>
+Kelly Nguyen <klynguyen@ucdavis.edu>
Ke Meng <mengke97@hotmail.com>
Kevin Brodsky <kevin.brodsky@arm.com>
Kevin Lim <ktlim@umich.edu>
+Kevin Loughlin <kevlough@umich.edu>
Khalique <khalique913@gmail.com>
Koan-Sin Tan <koansin.tan@gmail.com>
Korey Sewell <ksewell@umich.edu>
Krishnendra Nathella <Krishnendra.Nathella@arm.com> Krishnendra Nathella <krinat01@arm.com>
+ksco <numbksco@gmail.com>
+kunpai <kunpai@ucdavis.edu>
+Kyle Roarty <kyleroarty1716@gmail.com> Kyle Roarty <Kyle.Roarty@amd.com>
+Laura Hinman <llhinman@ucdavis.edu>
Lena Olson <leolson@google.com> Lena Olson <lena@cs.wisc,edu>
Lena Olson <leolson@google.com> Lena Olson <lena@cs.wisc.edu>
Lisa Hsu <Lisa.Hsu@amd.com> Lisa Hsu <hsul@eecs.umich.edu>
Lluc Alvarez <lluc.alvarez@bsc.es>
Lluís Vilanova <vilanova@ac.upc.edu> Lluis Vilanova <vilanova@ac.upc.edu>
+Lukas Steiner <lsteiner@rhrk.uni-kl.de>
+Luming Wang <wlm199558@126.com>
+m5test <m5test@zizzer>
Mahyar Samani <msamani@ucdavis.edu>
+Majid Jalili <majid0jalili@gmail.com>
Malek Musleh <malek.musleh@gmail.com> Nilay Vaish ext:(%2C%20Malek%20Musleh%20%3Cmalek.musleh%40gmail.com%3E) <nilay@cs.wisc.edu>
Marc Mari Barcelo <marc.maribarcelo@arm.com>
-Marc Orr <marc.orr@gmail.com> Marc Orr <morr@cs.wisc.edu>
Marco Balboni <Marco.Balboni@ARM.com>
Marco Elver <Marco.Elver@ARM.com> Marco Elver <marco.elver@ed.ac.uk>
+Marc Orr <marc.orr@gmail.com> Marc Orr <morr@cs.wisc.edu>
+Marjan Fariborz <mfariborz@ucdavis.edu> marjanfariborz <mfariborz@ucdavis.edu>
+Mark Hildebrand <mhildebrand@ucdavis.edu>
+Marton Erdos <marton.erdos@arm.com>
+Maryam Babaie <mbabaie@ucdavis.edu>
Matt DeVuyst <mdevuyst@gmail.com>
-Matt Evans <matt.evans@arm.com> Matt Evans <Matt.Evans@arm.com>
-Matt Horsnell <matt.horsnell@arm.com>Matt Horsnell <Matt.Horsnell@ARM.com>
-Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <Matt.Horsnell@arm.com>
-Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <matt.horsnell@ARM.com>
-Matt Poremba <matthew.poremba@amd.com> Matt Poremba <Matthew.Poremba@amd.com>
Matteo Andreozzi <matteo.andreozzi@arm.com> Matteo Andreozzi <Matteo.Andreozzi@arm.com>
Matteo M. Fusi <matteo.fusi@bsc.es>
+Matt Evans <matt.evans@arm.com> Matt Evans <Matt.Evans@arm.com>
Matthew Poremba <matthew.poremba@amd.com> Matthew Poremba <Matthew.Poremba@amd.com>
-Matt Sinclair <mattdsinclair@gmail.com> Matthew Sinclair <matthew.sinclair@amd.com>
Matthias Hille <matthiashille8@gmail.com>
Matthias Jung <jungma@eit.uni-kl.de>
+Matthias Jung <matthias.jung@iese.fraunhofer.de>
+Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <matt.horsnell@ARM.com>
+Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <Matt.Horsnell@arm.com>
+Matt Horsnell <matt.horsnell@arm.com>Matt Horsnell <Matt.Horsnell@ARM.com>
+Matt Poremba <matthew.poremba@amd.com> Matt Poremba <Matthew.Poremba@amd.com>
+Matt Sinclair <mattdsinclair@gmail.com> Matthew Sinclair <matthew.sinclair@amd.com>
+Matt Sinclair <mattdsinclair.wisc@gmail.com> Matt Sinclair <Matthew.Sinclair@amd.com>
Maurice Becker <madnaurice@googlemail.com>
Maxime Martinasso <maxime.cscs@gmail.com>
-Maximilian Stein <maximilian.stein@tu-dresden.de>
+Maximilian Stein <maximilian.stein@tu-dresden.de>Maximilian Stein <m@steiny.biz>
Maximilien Breughe <maximilien.breughe@elis.ugent.be> Maximilien Breughe <Maximilien.Breughe@elis.ugent.be>
+Melissa Jost <melissakjost@gmail.com>
Michael Adler <Michael.Adler@intel.com>
+Michael Boyer <Michael.Boyer@amd.com>
Michael LeBeane <michael.lebeane@amd.com> Michael LeBeane <Michael.Lebeane@amd.com>
Michael LeBeane <michael.lebeane@amd.com> mlebeane <michael.lebeane@amd.com>
Michael Levenhagen <mjleven@sandia.gov>
-Michiel Van Tol <michiel.vantol@arm.com> Michiel W. van Tol <Michiel.VanTol@arm.com>
Michiel Van Tol <michiel.vantol@arm.com> Michiel van Tol <Michiel.VanTol@arm.com>
+Michiel Van Tol <michiel.vantol@arm.com> Michiel W. van Tol <Michiel.VanTol@arm.com>
Miguel Serrano <mserrano@umich.edu>
+Mike Upton <michaelupton@gmail.com>
Miles Kaufmann <milesck@eecs.umich.edu>
-Min Kyu Jeong <minkyu.jeong@arm.com> Min Kyu Jeong <MinKyu.Jeong@arm.com>
Mingyuan <xiang_my@outlook.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@ARM.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@arm.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <mitch.hayenga+gem5@gmail.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>
+Min Kyu Jeong <minkyu.jeong@arm.com> Min Kyu Jeong <MinKyu.Jeong@arm.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitchell Hayenga <Mitchell.Hayenga@ARM.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@arm.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@ARM.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <mitch.hayenga+gem5@gmail.com>
Mohammad Alian <m.alian1369@gmail.com>
Monir Mozumder <monir.mozumder@amd.com>
Moyang Wang <mw828@cornell.edu>
Mrinmoy Ghosh <mrinmoy.ghosh@arm.com> Mrinmoy Ghosh <Mrinmoy.Ghosh@arm.com>
-Nathan Binkert <nate@binkert.org> Nathan Binkert <binkertn@umich.edu>
+Muhammad Sarmad Saeed <mssaeed@ucdavis.edu>
+Nadia Etemadi <netemadi@ucdavis.edu>
Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <Nathanael.Premillieu@arm.com>
+Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathanael.premillieu@huawei.com>
Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathanael.premillieu@irisa.fr>
Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathananel.premillieu@arm.com>
Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <npremill@irisa.fr>
+Nathan Binkert <nate@binkert.org> Nathan Binkert <binkertn@umich.edu>
Nayan Deshmukh <nayan26deshmukh@gmail.com>
Neha Agarwal <neha.agarwal@arm.com>
+Neil Natekar <nanatekar@ucdavis.edu>
Nicholas Lindsay <nicholas.lindsay@arm.com>
+Nicolas Boichat <drinkcat@google.com>
Nicolas Derumigny <nderumigny@gmail.com>
Nicolas Zea <nicolas.zea@gmail.com>
Nikos Nikoleris <nikos.nikoleris@arm.com> Nikos Nikoleris <nikos.nikoleris@gmail.com>
+Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) <nilay@cs.wisc.edu>
Nils Asmussen <nils.asmussen@barkhauseninstitut.org> Nils Asmussen <nilsasmussen7@gmail.com>
+Noah Katz <nkatz@rivosinc.com>
+ntampouratzis <ntampouratzis@isc.tuc.gr>
Nuwan Jayasena <Nuwan.Jayasena@amd.com>
Ola Jeppsson <ola.jeppsson@gmail.com>
Omar Naji <Omar.Naji@arm.com>
+Onur Kayiran <onur.kayiran@amd.com>
Pablo Prieto <pablo.prieto@unican.es>
+paikunal <kunpai@ucdavis.edu>
Palle Lyckegaard <palle@lyckegaard.dk>
Pau Cabre <pau.cabre@metempsy.com>
Paul Rosenfeld <prosenfeld@micron.com> Paul Rosenfeld <dramninjas@gmail.com>
Paul Rosenfeld <prosenfeld@micron.com> Paul Rosenfeld <prosenfeld@micon.com>
Peter Enns <Peter.Enns@arm.com> Pierre-Yves Péneau <pierre-yves.peneau@lirmm.fr>
+Peter <petery.hin@huawei.com>
+Peter Yuen <ppeetteerrsx@gmail.com>
+Philip Metzler <cpmetz@google.com>
+Pierre Ayoub <pierre.ayoub.pro@tutanota.com>
Pin-Yen Lin <treapking@google.com>
Po-Hao Su <supohaosu@gmail.com>
Polina Dudnik <pdudnik@cs.wisc.edu> Polina Dudnik <pdudnik@gmail.com>
@@ -229,23 +311,26 @@
Prakash Ramrakhyani <prakash.ramrakhyani@arm.com> Prakash Ramrakhani <Prakash.Ramrakhani@arm.com>
Prakash Ramrakhyani <prakash.ramrakhyani@arm.com> Prakash Ramrakhyani <Prakash.Ramrakhyani@arm.com>
Pritha Ghoshal <pritha9987@tamu.edu>
+Quentin Forcioli <quentin.forcioli@telecom-paris.fr>
Radhika Jagtap <radhika.jagtap@arm.com> Radhika Jagtap <radhika.jagtap@ARM.com>
Rahul Thakur <rjthakur@google.com>
Reiley Jeapaul <Reiley.Jeyapaul@arm.com>
-Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai <Rekai.GonzalezAlberquilla@arm.com>
-Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <rekai.gonzalezalberquilla@arm.com>
+Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez-Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
+Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai <Rekai.GonzalezAlberquilla@arm.com>
Rene de Jong <rene.dejong@arm.com>
Ricardo Alves <ricardo.alves@arm.com>
+Richard Cooper <richard.cooper@arm.com>
+Richard D. Strong <r.d.strong@gmail.com>
Richard Strong <rstrong@hp.com> Richard Strong <r.d.strong@gmail.com>
Richard Strong <rstrong@hp.com> Richard Strong <rstrong@cs.ucsd.edu>
Richard Strong <rstrong@hp.com> Rick Strong <rstrong@cs.ucsd.edu>
Rico Amslinger <rico.amslinger@informatik.uni-augsburg.de>
Riken Gohil <Riken.Gohil@arm.com>
Rizwana Begum <rb639@drexel.edu>
-Robert Scheffel <robert.scheffel1@tu-dresden.de> Robert <robert.scheffel1@tu-dresden.de>
Robert Kovacsics <rmk35@cl.cam.ac.uk>
+Robert Scheffel <robert.scheffel1@tu-dresden.de> Robert <robert.scheffel1@tu-dresden.de>
Rohit Kurup <rohit.kurup@arm.com>
Ron Dreslinski <rdreslin@umich.edu> Ronald Dreslinski <rdreslin@umich.edu>
Ruben Ayrapetyan <ruben.ayrapetyan@arm.com>
@@ -253,20 +338,27 @@
Ruslan Bukin <br@bsdpad.com> Ruslan Bukin ext:(%2C%20Zhang%20Guoye) <br@bsdpad.com>
Rutuja Oza <roza@ucdavis.edu>
Ryan Gambord <gambordr@oregonstate.edu>
+sacak32 <byrakocalan99@gmail.com>
+Sampad Mohapatra <sampad.mohapatra@gmail.com>
Samuel Grayson <sam@samgrayson.me>
-Sandipan Das <sandipan@linux.ibm.com>
+Samuel Stark <samuel.stark2@arm.com>
+Sandipan Das <31861871+sandip4n@users.noreply.github.com>
+Sandipan Das <sandipan@linux.ibm.com> Sandipan Das <31861871+sandip4n@users.noreply.github.com>
Santi Galan <santi.galan@metempsy.com>
-Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <Sascha.Bischoff@ARM.com>
Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <sascha.bischoff@ARM.com>
+Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <Sascha.Bischoff@ARM.com>
Sean McGoogan <Sean.McGoogan@arm.com>
Sean Wilson <spwilson2@wisc.edu>
Sergei Trofimov <sergei.trofimov@arm.com>
Severin Wischmann <wiseveri@student.ethz.ch> Severin Wischmann ext:(%2C%20Ioannis%20Ilkos%20%3Cioannis.ilkos09%40imperial.ac.uk%3E) <wiseveri@student.ethz.ch>
Shawn Rosti <shawn.rosti@gmail.com>
Sherif Elhabbal <elhabbalsherif@gmail.com>
+Shivani Parekh <shparekh@ucdavis.edu>
+Shivani <shparekh@ucdavis.edu>
Siddhesh Poyarekar <siddhesh.poyarekar@gmail.com>
Somayeh Sardashti <somayeh@cs.wisc.edu>
Sooraj Puthoor <puthoorsooraj@gmail.com>
+Sooraj Puthoor <Sooraj.Puthoor@amd.com>
Sophiane Senni <sophiane.senni@gmail.com>
Soumyaroop Roy <sroy@cse.usf.edu>
Srikant Bharadwaj <srikant.bharadwaj@amd.com>
@@ -275,13 +367,14 @@
Stephan Diestelhorst <stephan.diestelhorst@arm.com> Stephan Diestelhorst <stephan.diestelhorst@ARM.com>
Stephen Hines <hines@cs.fsu.edu>
Steve Raasch <sraasch@umich.edu>
-Steve Reinhardt <stever@gmail.com> Steve Reinhardt <Steve.Reinhardt@amd.com>
-Steve Reinhardt <stever@gmail.com> Steve Reinhardt <steve.reinhardt@amd.com>
-Steve Reinhardt <stever@gmail.com> Steve Reinhardt <stever@eecs.umich.edu>
Steve Reinhardt <stever@gmail.com> Steve Reinhardt ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E%2C%20Ali%20Saidi%20%3CAli.Saidi%40ARM.com%3E) <stever@gmail.com>
+Steve Reinhardt <stever@gmail.com> Steve Reinhardt <stever@eecs.umich.edu>
+Steve Reinhardt <stever@gmail.com> Steve Reinhardt <steve.reinhardt@amd.com>
+Steve Reinhardt <stever@gmail.com> Steve Reinhardt <Steve.Reinhardt@amd.com>
Stian Hvatum <stian@dream-web.no>
Sudhanshu Jha <sudhanshu.jha@arm.com>
Sujay Phadke <electronicsguy123@gmail.com>
+Sungkeun Kim <ksungkeun84@tamu.edu>
Swapnil Haria <swapnilster@gmail.com> Swapnil Haria <swapnilh@cs.wisc.edu>
Taeho Kgil <tkgil@umich.edu>
Tao Zhang <tao.zhang.0924@gmail.com>
@@ -290,45 +383,50 @@
Tim Harris <tharris@microsoft.com>
Timothy Hayes <timothy.hayes@arm.com>
Timothy M. Jones <timothy.jones@arm.com> Timothy Jones <timothy.jones@cl.cam.ac.uk>
-Timothy M. Jones <timothy.jones@arm.com> Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) <nilay@cs.wisc.edu>
Timothy M. Jones <timothy.jones@arm.com> Timothy M. Jones <timothy.jones@cl.cam.ac.uk>
Timothy M. Jones <timothy.jones@arm.com> Timothy M. Jones <tjones1@inf.ed.ac.uk>
Tom Jablin <tjablin@gmail.com>
Tommaso Marinelli <tommarin@ucm.es>
+Tom Rollet <tom.rollet@huawei.com>
+Tong Shen <endlessroad@google.com>
Tony Gutierrez <anthony.gutierrez@amd.com> Anthony Gutierrez <atgutier@umich.edu>
-Tuan Ta <qtt2@cornell.edu> Tuan Ta <taquangtuan1992@gmail.com>
-Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <Tushar.Krishna@amd.com>
+Travis Boraten <travis.boraten@amd.com>
+Trivikram Reddy <tvreddy@ucdavis.edu> tv-reddy <tvreddy@ucdavis.edu>
+Tuan Ta <qtt2@cornell.edu> Tuan Ta <taquangtuan1992@gmail.com> Tuan Ta <tuan.ta@amd.com>
Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <tushar@csail.mit.edu>
+Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <Tushar.Krishna@amd.com>
Umesh Bhaskar <umesh.b2006@gmail.com>
Uri Wiener <uri.wiener@arm.com>
Victor Garcia <victor.garcia@arm.com>
Vilas Sridharan <vilas.sridharan@gmail.com>
-Vince Weaver <vince@csl.cornell.edu>
Vincentius Robby <acolyte@umich.edu>
+Vince Weaver <vince@csl.cornell.edu>
+vramadas95 <vramadas@wisc.edu>
+vsoria <victor.soria@bsc.es>
Wade Walker <wade.walker@arm.com>
+Wei-Han Chen <weihanchen@google.com>
Weiping Liao <weipingliao@google.com>
+Wende Tan <twd2@163.com>
Wendy Elsasser <wendy.elsasser@arm.com>
-William Wang <william.wang@arm.com> William Wang <William.Wang@ARM.com>
William Wang <william.wang@arm.com> William Wang <William.Wang@arm.com>
+William Wang <william.wang@arm.com> William Wang <William.Wang@ARM.com>
Willy Wolff <willy.mh.wolff.ml@gmail.com>
+Wing Li <wingers@google.com>
Xiangyu Dong <rioshering@gmail.com>
-Xianwei Zhang <xianwei.zhang@amd.com>
+Xianwei Zhang <xianwei.zhang.@amd.com> Xianwei Zhang <xianwei.zhang@amd.com>
Xiaoyu Ma <xiaoyuma@google.com>
Xin Ouyang <xin.ouyang@streamcomputing.com>
+Xiongfei <xiongfei.liao@gmail.com>
Yasuko Eckert <yasuko.eckert@amd.com>
-Yi Xiang <yix@colostate.edu>
+Yen-lin Lai <yenlinlai@google.com>
Yifei Liu <liu.ad2039@gmail.com>
-Yu-hsin Wang <yuhsingw@google.com>
+yiwkd2 <yiwkd2@gmail.com>
+Yi Xiang <yix@colostate.edu>
Yuan Yao <yuanyao@seas.harvard.edu>
Yuetsu Kodama <yuetsu.kodama@riken.jp> yuetsu.kodama <yuetsu.kodama@riken.jp>
+Yu-hsin Wang <yuhsingw@google.com>
Zhang Zheng <perise@gmail.com>
+Zhantong Qiu <ztqiu@ucdavis.edu>
+Zhengrong Wang <seanzw@ucla.edu> seanzw <seanyukigeek@gmail.com>
+zhongchengyong <zhongcy93@gmail.com>
Zicong Wang <wangzicong@nudt.edu.cn>
-Éder F. Zulian <zulian@eit.uni-kl.de>
-Gabe Loh <gabriel.loh@amd.com> gloh <none@none>
-jiegec <noc@jiegec.ac.cn>
-m5test <m5test@zizzer>
-Marjan Fariborz <mfariborz@ucdavis.edu> marjanfariborz <mfariborz@ucdavis.edu>
-Mike Upton <michaelupton@gmail.com>
-seanzw <seanyukigeek@gmail.com>
-Trivikram Reddy <tvreddy@ucdavis.edu> tv-reddy <tvreddy@ucdavis.edu>
-
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..8cbc6af
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,89 @@
+# Copyright (c) 2022 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+minimum_pre_commit_version: "2.18"
+
+default_language_version:
+ python: python3
+
+exclude: |
+ (?x)^(
+ ext/.*|
+ build/.*|
+ src/systemc/ext/.*|
+ src/systemc/tests/.*/.*|
+ src/python/m5/ext/pyfdt/.*|
+ tests/.*/ref/.*
+ )$
+
+default_stages: [commit]
+
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.3.0
+ hooks:
+ - id: trailing-whitespace
+ - id: end-of-file-fixer
+ - id: check-json
+ - id: check-yaml
+ - id: check-added-large-files
+ - id: mixed-line-ending
+ args: [--fix=lf]
+ - id: check-case-conflict
+- repo: https://github.com/psf/black
+ rev: 22.6.0
+ hooks:
+ - id: black
+- repo: local
+ hooks:
+ - id: gem5-style-checker
+ name: gem5 style checker
+ entry: util/git-pre-commit.py
+ always_run: true
+ exclude: ".*"
+ language: system
+ description: 'The gem5 style checker hook.'
+ - id: gem5-commit-msg-checker
+ name: gem5 commit msg checker
+ entry: ext/git-commit-msg
+ language: system
+ stages: [commit-msg]
+ description: 'The gem5 commit message checker hook.'
+ - id: gerrit-commit-msg-job
+ name: gerrit commit message job
+ entry: util/gerrit-commit-msg-hook
+ language: system
+ stages: [commit-msg]
+ description: 'Adds Change-ID to the commit message. Needed by Gerrit.'
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 189b63f..ae771d3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -136,6 +136,37 @@
[git book]: https://git-scm.com/book/en/v2/Git-Branching-Rebasing
+
+Setting up pre-commit
+---------------------
+
+To help ensure the gem5 style guide is maintained, we use [pre-commit](
+https://pre-commit.com) to run checks on changes to be contributed.
+
+To setup pre-commit, run the following in your gem5 directory to install the
+pre-commit and commit message hooks.
+
+```sh
+pip install pre-commit
+pre-commit install -t pre-commit -t commit-msg
+```
+
+The hooks are also automatically installed when gem5 is compiled.
+
+When you run a `git commit` command the pre-commit hook will run checks on your
+committed code. The commit will be blocked if a check fails.
+
+The same checks are run as part of Gerrit's CI tests (those required to obtain
+a Verified label, necessary for a change to be accepted to the develop branch).
+Therefore setting up pre-commit in your local gem5 development environment is
+recommended.
+
+You can automatically format files to pass the pre-commit tests by running:
+
+```sh
+pre-commit run --files <files to format>
+```
+
Requirements for change descriptions
------------------------------------
To help reviewers and future contributors more easily understand and track
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 2353a96..931be69 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,3 +1,121 @@
+# Version 22.1.0.0
+
+This release has 500 contributions from 48 unique contributors and marks our second major release of 2022.
+This release incorporates several new features, improvements, and bug fixes for the computer architecture reserach community.
+
+See below for more details!
+
+## New features and improvements
+
+- The gem5 binary can now be compiled to include multiple ISA targets.
+A compilation of gem5 which includes all gem5 ISAs can be created using: `scons build/ALL/gem5.opt`.
+This will use the Ruby `MESI_Two_Level` cache coherence protocol by default, to use other protocols: `scons build/ALL/gem5.opt PROTOCOL=<other protocol>`.
+The classic cache system may continue to be used regardless as to which Ruby cache coherence protocol is compiled.
+- The `m5` Python module now includes functions to set exit events are particular simululation ticks:
+ - *setMaxTick(tick)* : Used to to specify the maximum simulation tick.
+ - *getMaxTick()* : Used to obtain the maximum simulation tick value.
+ - *getTicksUntilMax()*: Used to get the number of ticks remaining until the maximum tick is reached.
+ - *scheduleTickExitFromCurrent(tick)* : Used to schedule an exit exit event a specified number of ticks in the future.
+ - *scheduleTickExitAbsolute(tick)* : Used to schedule an exit event as a specified tick.
+- We now include the `RiscvMatched` board as part of the gem5 stdlib.
+This board is modeled after the [HiFive Unmatched board](https://www.sifive.com/boards/hifive-unmatched) and may be used to emulate its behavior.
+See "configs/example/gem5_library/riscv-matched-fs.py" and "configs/example/gem5_library/riscv-matched-hello.py" for examples using this board.
+- An API for [SimPoints](https://doi.org/10.1145/885651.781076) has been added.
+SimPoints can substantially improve gem5 Simulation time by only simulating representative parts of a simulation then extrapolating statistical data accordingly.
+Examples of using SimPoints with gem5 can be found in "configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py" and "configs/example/gem5_library/checkpoints/simpoints-se-restore.py".
+- "Workloads" have been introduced to gem5.
+Workloads have been incorporated into the gem5 Standard library.
+They can be used specify the software to be run on a simulated system that come complete with input parameters and any other dependencies necessary to run a simuation on the target hardware.
+At the level of the gem5 configuration script a user may specify a workload via a board's `set_workload` function.
+For example, `set_workload(Workload("x86-ubuntu-18.04-boot"))` sets the board to use the "x86-ubuntu-18.04-boot" workload.
+This workload specifies a boot consisting of the Linux 5.4.49 kernel then booting an Ubunutu 18.04 disk image, to exit upon booting.
+Workloads are agnostic to underlying gem5 design and, via the gem5-resources infrastructure, will automatically retrieve all necessary kernels, disk-images, etc., necessary to execute.
+Examples of using gem5 Workloads can be found in "configs/example/gem5_library/x86-ubuntu-ruby.py" and "configs/example/gem5_library/riscv-ubuntu-run.py".
+- To aid gem5 developers, we have incorporated [pre-commit](https://pre-commit.com) checks into gem5.
+These checks automatically enforce the gem5 style guide on Python files and a subset of other requirements (such as line length) on altered code prior to a `git commit`.
+Users may install pre-commit by running `./util/pre-commit-install.sh`.
+Passing these checks is a requirement to submit code to gem5 so installation is strongly advised.
+- A multiprocessing module has been added.
+This allows for multiple simulations to be run from a single gem5 execution via a single gem5 configuration script.
+Example of usage found [in this commit message](https://gem5-review.googlesource.com/c/public/gem5/+/63432).
+**Note: This feature is still in development.
+While functional, it'll be subject to subtantial changes in future releases of gem5**.
+- The stdlib's `ArmBoard` now supports Ruby caches.
+- Due to numerious fixes and improvements, Ubuntu 22.04 can be booted as a gem5 workload, both in FS and SE mode.
+- Substantial improvements have been made to gem5's GDB capabilities.
+- The `HBM2Stack` has been added to the gem5 stdlib as a memory component.
+- The `MinorCPU` has been fully incorporated into the gem5 Standard Library.
+- We now allow for full-system simulation of GPU applications.
+The introduction of GPU FS mode allows for the same use-cases as SE mode but reduces the requirement of specific host environments or usage of a Docker container.
+The GPU FS mode also has improved simulated speed by functionally simulating memory copies, and provides an easier update path for gem5 developers.
+An X86 host and KVM are required to run GPU FS mode.
+
+## API (user facing) changes
+
+- The default CPU Vendor String has been updated to `HygonGenuine`.
+This is due to newer versions of GLIBC being more strict about checking current system's supported features.
+The previous value, `M5 Simulator`, is not recognized as a valid vendor string and therefore GLIBC returns an error.
+- [The stdlib's `_connect_things` funciton call has been moved from the `AbstractBoard`'s constructor to be run as board pre-instantiation process](https://gem5-review.googlesource.com/c/public/gem5/+/65051).
+This is to overcome instances where stdlib components (memory, processor, and cache hierarhcy) require Board information known only after its construction.
+**This change breaks cases where a user utilizes the stdlib `AbstractBoard` but does not use the stdlib `Simulator` module. This can be fixed by adding the `_pre_instantiate` function before `m5.instantiate`**.
+An exception has been added which explains this fix, if this error occurs.
+- The setting of checkpoints has been moved from the stdlib's "set_workload" functions to the `Simulator` module.
+Setting of checkpoints via the stdlib's "set_workload" functions is now deprecated and will be removed in future releases of gem5.
+- The gem5 namespace `Trace` has been renamed `trace` to conform to the gem5 style guide.
+- Due to the allowing of multiple ISAs per gem5 build, the `TARGET_ISA` variable has been replaced with `USE_$(ISA)` variables.
+For example, if a build contains both the X86 and ARM ISAs the `USE_X86` and `USE_ARM` variables will be set.
+
+## Big Fixes
+
+- Several compounding bugs were causing bugs with floating point operations within gem5 simulations.
+These have been fixed.
+- Certain emulated syscalls were behaving incorrectly when using RISC-V due to incorrect `open(2)` flag values.
+These values have been fixed.
+- The GIVv3 List register mapping has been fixed.
+- Access permissions for GICv3 cpu registers have been fixed.
+- In previous releases of gem5 the `sim_quantum` value was set for all cores when using the Standard Library.
+This caused issues when setting exit events at a particular tick as it resulted in the exit being off by `sim_quantum`.
+As such, the `sim_quantum` value is only when using KVM cores.
+- PCI ranges in `VExpress_GEM5_Foundation` fixed.
+- The `SwitchableProcessor` processor has been fixed to allow switching to a KVM core.
+Previously the `SwitchableProcessor` only allowed a user to switch from a KVM core to a non-KVM core.
+- The Standard Library has been fixed to permit multicore simulations in SE mode.
+- [A bug was fixed in the rcr X86 instruction](https://gem5.atlassian.net/browse/GEM5-1265).
+
+## Build related changes
+
+- gem5 can now be compiled with Scons 4 build system.
+- gem5 can now be compiled with Clang version 14 (minimum Clang version 6).
+- gem5 can now be compiled with GCC Version 12 (minimum GCC version 7).
+
+
+## Other minor updates
+
+- The gem5 stdlib examples in "configs/example/gem5_library" have been updated to, where appropriate, use the stdlib's Simulator module.
+These example configurations can be used for reference as to how `Simulator` module may be utilized in gem5.
+- Granulated SGPR computation has been added for gfx9 gpu-compute.
+- The stdlib statistics have been improved:
+ - A `get_simstats` function has been added to access statistics from the `Simulator` module.
+ - Statistics can be printed: `print(simstats.board.core.some_integer)`.
+- GDB ports are now specified for each workload, as opposed to per-simulation run.
+- The `m5` utility has been expanded to include "workbegin" and "workend" annotations.
+This can be added with `m5 workbegin` and `m5 workend`.
+- A `PrivateL1SharedL2CacheHierarchy` has been added to the Standard Library.
+- A `GEM5_USE_PROXY` environment variable has been added.
+This allows users to specify a socks5 proxy server to use when obtaining gem5 resources and the resources.json file.
+It uses the format `<host>:<port>`.
+- The fastmodel support has been improved to function with Linux Kernel 5.x.
+- The `set_se_binary_workload` function now allows for the passing of input parameters to a binary workload.
+- A functional CHI cache hierarchy has been added to the gem5 Standard Library: "src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py".
+- The RISC-V K extension has been added.
+It includes the following instructions:
+ - Zbkx: xperm8, xperm4
+ - Zknd: aes64ds, aes64dsm, aes64im, aes64ks1i, aes64ks2
+ - Zkne: aes64es, aes64esm, aes64ks1i, aes64ks2
+ - Zknh: sha256sig0, sha256sig1, sha256sum0, sha256sum1, sha512sig0, sha512sig1, sha512sum0, sha512sum1
+ - Zksed: sm4ed, sm4ks
+ - Zksh: sm3p0, sm3p1
+
# Version 22.0.0.2
**[HOTFIX]** This hotfix contains a set of critical fixes to be applied to gem5 v22.0.
diff --git a/SConstruct b/SConstruct
index f1f1c64..e8107ea 100755
--- a/SConstruct
+++ b/SConstruct
@@ -319,7 +319,10 @@
if conf.TryAction(f'@{python_config} --embed')[0]:
cmd.append('--embed')
- def flag_filter(env, cmd_output):
+ def flag_filter(env, cmd_output, unique=True):
+ # Since this function does not use the `unique` param, one should not
+ # pass any value to this param.
+ assert(unique==True)
flags = cmd_output.split()
prefixes = ('-l', '-L', '-I')
is_useful = lambda x: any(x.startswith(prefix) for prefix in prefixes)
@@ -417,7 +420,6 @@
conf.CheckLinkFlag('-Wl,--threads')
conf.CheckLinkFlag(
'-Wl,--thread-count=%d' % GetOption('num_jobs'))
-
else:
error('\n'.join((
"Don't know what compiler options to use for your compiler.",
diff --git a/TESTING.md b/TESTING.md
index 88d1f29..2273e31 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -15,7 +15,7 @@
To build and run all the unit tests:
```shell
-scons build/NULL/unittests.opt
+scons build/ALL/unittests.opt
```
All unit tests should be run prior to posting a patch to
@@ -25,20 +25,20 @@
`src/base/bitunion.test.cc`):
```shell
-scons build/NULL/base/bitunion.test.opt
-./build/NULL/base/bitunion.test.opt
+scons build/ALL/base/bitunion.test.opt
+./build/ALL/base/bitunion.test.opt
```
To list the available test functions from a test file:
```shell
-./build/NULL/base/bitunion.test.opt --gtest_list_tests
+./build/ALL/base/bitunion.test.opt --gtest_list_tests
```
To run a specific test function (e.g., BitUnionData.NormalBitfield):
```shell
-./build/NULL/base/bitunion.test.opt --gtest_filter=BitUnionData.NormalBitfield
+./build/ALL/base/bitunion.test.opt --gtest_filter=BitUnionData.NormalBitfield
```
# Running system-level tests
@@ -246,10 +246,9 @@
## Running Tests in Parallel
Whimsy has support for parallel testing baked in. This system supports
-running multiple suites at the same time on the same computer. To run
+running multiple suites at the same time on the same computer. To run
suites in parallel, supply the `-t <number-tests>` flag to the run command.
For example, to run up to three test suites at the same time::
./main.py run --skip-build -t 3
-
diff --git a/build_opts/ALL b/build_opts/ALL
new file mode 100644
index 0000000..6e5ede2
--- /dev/null
+++ b/build_opts/ALL
@@ -0,0 +1,7 @@
+USE_ARM_ISA = True
+USE_MIPS_ISA = True
+USE_POWER_ISA = True
+USE_RISCV_ISA = True
+USE_SPARC_ISA = True
+USE_X86_ISA = True
+PROTOCOL = 'MESI_Two_Level'
diff --git a/build_opts/ARM b/build_opts/ARM
index 5b7da10..8c30c21 100644
--- a/build_opts/ARM
+++ b/build_opts/ARM
@@ -1,2 +1,2 @@
-TARGET_ISA = 'arm'
+USE_ARM_ISA = True
PROTOCOL = 'CHI'
diff --git a/build_opts/ARM_MESI_Three_Level b/build_opts/ARM_MESI_Three_Level
index 2ca31b6..3057bec 100644
--- a/build_opts/ARM_MESI_Three_Level
+++ b/build_opts/ARM_MESI_Three_Level
@@ -1,5 +1,5 @@
# Copyright (c) 2019 ARM Limited
# All rights reserved.
-TARGET_ISA = 'arm'
+USE_ARM_ISA = True
PROTOCOL = 'MESI_Three_Level'
diff --git a/build_opts/ARM_MESI_Three_Level_HTM b/build_opts/ARM_MESI_Three_Level_HTM
index 703398d..7f80c4e 100644
--- a/build_opts/ARM_MESI_Three_Level_HTM
+++ b/build_opts/ARM_MESI_Three_Level_HTM
@@ -1,5 +1,5 @@
# Copyright (c) 2019 ARM Limited
# All rights reserved.
-TARGET_ISA = 'arm'
+USE_ARM_ISA = True
PROTOCOL = 'MESI_Three_Level_HTM'
diff --git a/build_opts/ARM_MOESI_hammer b/build_opts/ARM_MOESI_hammer
index bd5c63f..5322fd9 100644
--- a/build_opts/ARM_MOESI_hammer
+++ b/build_opts/ARM_MOESI_hammer
@@ -1,5 +1,5 @@
# Copyright (c) 2019 ARM Limited
# All rights reserved.
-TARGET_ISA = 'arm'
+USE_ARM_ISA = True
PROTOCOL = 'MOESI_hammer'
diff --git a/build_opts/GCN3_X86 b/build_opts/GCN3_X86
index b396908..aca2f62 100644
--- a/build_opts/GCN3_X86
+++ b/build_opts/GCN3_X86
@@ -1,4 +1,4 @@
PROTOCOL = 'GPU_VIPER'
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
TARGET_GPU_ISA = 'gcn3'
BUILD_GPU = True
diff --git a/build_opts/Garnet_standalone b/build_opts/Garnet_standalone
index fd730c3..2351c52 100644
--- a/build_opts/Garnet_standalone
+++ b/build_opts/Garnet_standalone
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
PROTOCOL = 'Garnet_standalone'
diff --git a/build_opts/MIPS b/build_opts/MIPS
index 26cb23c..382e101 100644
--- a/build_opts/MIPS
+++ b/build_opts/MIPS
@@ -1,2 +1,2 @@
-TARGET_ISA = 'mips'
+USE_MIPS_ISA = True
PROTOCOL = 'MI_example'
diff --git a/build_opts/NULL b/build_opts/NULL
index b749729..51e287a 100644
--- a/build_opts/NULL
+++ b/build_opts/NULL
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
PROTOCOL='MI_example'
diff --git a/build_opts/NULL_MESI_Two_Level b/build_opts/NULL_MESI_Two_Level
index 09147b2..bafb199 100644
--- a/build_opts/NULL_MESI_Two_Level
+++ b/build_opts/NULL_MESI_Two_Level
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
PROTOCOL = 'MESI_Two_Level'
diff --git a/build_opts/NULL_MOESI_CMP_directory b/build_opts/NULL_MOESI_CMP_directory
index 466a268..3346964 100644
--- a/build_opts/NULL_MOESI_CMP_directory
+++ b/build_opts/NULL_MOESI_CMP_directory
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
PROTOCOL='MOESI_CMP_directory'
diff --git a/build_opts/NULL_MOESI_CMP_token b/build_opts/NULL_MOESI_CMP_token
index 0cd0305..4ea9e70 100644
--- a/build_opts/NULL_MOESI_CMP_token
+++ b/build_opts/NULL_MOESI_CMP_token
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
PROTOCOL='MOESI_CMP_token'
diff --git a/build_opts/NULL_MOESI_hammer b/build_opts/NULL_MOESI_hammer
index 39ebcae..e91b78d 100644
--- a/build_opts/NULL_MOESI_hammer
+++ b/build_opts/NULL_MOESI_hammer
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
PROTOCOL='MOESI_hammer'
diff --git a/build_opts/POWER b/build_opts/POWER
index 35772a4..207356c 100644
--- a/build_opts/POWER
+++ b/build_opts/POWER
@@ -1,2 +1,2 @@
-TARGET_ISA = 'power'
+USE_POWER_ISA = True
PROTOCOL = 'MI_example'
diff --git a/build_opts/RISCV b/build_opts/RISCV
index 0bd069d..22097b0 100644
--- a/build_opts/RISCV
+++ b/build_opts/RISCV
@@ -1,2 +1,2 @@
-TARGET_ISA = 'riscv'
+USE_RISCV_ISA = True
PROTOCOL = 'MI_example'
diff --git a/build_opts/SPARC b/build_opts/SPARC
index 98acfe2..22dec5f 100644
--- a/build_opts/SPARC
+++ b/build_opts/SPARC
@@ -1,2 +1,2 @@
-TARGET_ISA = 'sparc'
+USE_SPARC_ISA = True
PROTOCOL = 'MI_example'
diff --git a/build_opts/VEGA_X86 b/build_opts/VEGA_X86
index 11e8232..437b048 100644
--- a/build_opts/VEGA_X86
+++ b/build_opts/VEGA_X86
@@ -1,4 +1,4 @@
PROTOCOL = 'GPU_VIPER'
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
TARGET_GPU_ISA = 'vega'
BUILD_GPU = True
diff --git a/build_opts/X86 b/build_opts/X86
index 72b200a..259325b 100644
--- a/build_opts/X86
+++ b/build_opts/X86
@@ -1,3 +1,3 @@
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
PROTOCOL = 'MESI_Two_Level'
NUMBER_BITS_PER_SET = '128'
diff --git a/build_opts/X86_MESI_Two_Level b/build_opts/X86_MESI_Two_Level
index 72b200a..259325b 100644
--- a/build_opts/X86_MESI_Two_Level
+++ b/build_opts/X86_MESI_Two_Level
@@ -1,3 +1,3 @@
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
PROTOCOL = 'MESI_Two_Level'
NUMBER_BITS_PER_SET = '128'
diff --git a/build_opts/X86_MI_example b/build_opts/X86_MI_example
index 483cf04..71bc9a5 100644
--- a/build_opts/X86_MI_example
+++ b/build_opts/X86_MI_example
@@ -1,2 +1,2 @@
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
PROTOCOL = 'MI_example'
diff --git a/build_opts/X86_MOESI_AMD_Base b/build_opts/X86_MOESI_AMD_Base
index 261bedb..f8f2ce7 100644
--- a/build_opts/X86_MOESI_AMD_Base
+++ b/build_opts/X86_MOESI_AMD_Base
@@ -1,2 +1,2 @@
PROTOCOL = 'MOESI_AMD_Base'
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
diff --git a/build_tools/blob.py b/build_tools/blob.py
index 3d93c45..b3d2d0f 100644
--- a/build_tools/blob.py
+++ b/build_tools/blob.py
@@ -26,16 +26,17 @@
import array
import functools
+
def bytesToCppArray(code, symbol, data):
- '''
+ """
Output an array of bytes to a code formatter as a c++ array declaration.
- '''
- code('const std::uint8_t ${symbol}[] = {')
+ """
+ code("const std::uint8_t ${symbol}[] = {")
code.indent()
step = 16
for i in range(0, len(data), step):
- x = array.array('B', data[i:i+step])
- strs = map(lambda i: f'{i},', x)
+ x = array.array("B", data[i : i + step])
+ strs = map(lambda i: f"{i},", x)
code(functools.reduce(lambda x, y: x + y, strs))
code.dedent()
- code('};')
+ code("};")
diff --git a/build_tools/code_formatter.py b/build_tools/code_formatter.py
index 374e8cc..a2651c9 100644
--- a/build_tools/code_formatter.py
+++ b/build_tools/code_formatter.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2022 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
# Copyright (c) 2006-2009 Nathan Binkert <nate@binkert.org>
# All rights reserved.
#
@@ -33,6 +45,7 @@
import os
import re
+
class lookup(object):
def __init__(self, formatter, frame, *args, **kwargs):
self.frame = frame
@@ -52,10 +65,10 @@
if item in self.kwargs:
return self.kwargs[item]
- if item == '__file__':
+ if item == "__file__":
return self.frame.f_code.co_filename
- if item == '__line__':
+ if item == "__line__":
return self.frame.f_lineno
if self.formatter.locals and item in self.frame.f_locals:
@@ -77,6 +90,7 @@
pass
raise IndexError("Could not find '%s'" % item)
+
class code_formatter_meta(type):
pattern = r"""
(?:
@@ -90,44 +104,48 @@
%(delim)s(?P<invalid>) # ill-formed delimiter exprs
)
"""
+
def __init__(cls, name, bases, dct):
super(code_formatter_meta, cls).__init__(name, bases, dct)
- if 'pattern' in dct:
+ if "pattern" in dct:
pat = cls.pattern
else:
# tuple expansion to ensure strings are proper length
- lb,rb = cls.braced
- lb1,lb2,rb2,rb1 = cls.double_braced
+ lb, rb = cls.braced
+ lb1, lb2, rb2, rb1 = cls.double_braced
pat = code_formatter_meta.pattern % {
- 'delim' : re.escape(cls.delim),
- 'ident' : cls.ident,
- 'pos' : cls.pos,
- 'lb' : re.escape(lb),
- 'rb' : re.escape(rb),
- 'ldb' : re.escape(lb1+lb2),
- 'rdb' : re.escape(rb2+rb1),
- }
+ "delim": re.escape(cls.delim),
+ "ident": cls.ident,
+ "pos": cls.pos,
+ "lb": re.escape(lb),
+ "rb": re.escape(rb),
+ "ldb": re.escape(lb1 + lb2),
+ "rdb": re.escape(rb2 + rb1),
+ }
cls.pattern = re.compile(pat, re.VERBOSE | re.DOTALL | re.MULTILINE)
+
class code_formatter(object, metaclass=code_formatter_meta):
- delim = r'$'
- ident = r'[_A-z]\w*'
- pos = r'[0-9]+'
- braced = r'{}'
- double_braced = r'{{}}'
+ delim = r"$"
+ ident = r"[_A-z]\w*"
+ pos = r"[0-9]+"
+ braced = r"{}"
+ double_braced = r"{{}}"
globals = True
locals = True
fix_newlines = True
+
def __init__(self, *args, **kwargs):
self._data = []
self._dict = {}
self._indent_level = 0
self._indent_spaces = 4
- self.globals = kwargs.pop('globals', type(self).globals)
- self.locals = kwargs.pop('locals', type(self).locals)
- self._fix_newlines = \
- kwargs.pop('fix_newlines', type(self).fix_newlines)
+ self.globals = kwargs.pop("globals", type(self).globals)
+ self.locals = kwargs.pop("locals", type(self).locals)
+ self._fix_newlines = kwargs.pop(
+ "fix_newlines", type(self).fix_newlines
+ )
if args:
self.__call__(args)
@@ -159,38 +177,44 @@
# Add a comment to inform which file generated the generated file
# to make it easier to backtrack and modify generated code
frame = inspect.currentframe().f_back
- if re.match('\.(cc|hh|c|h)', extension) is not None:
- f.write(f'''/**
+ if re.match(r"^\.(cc|hh|c|h)$", extension) is not None:
+ f.write(
+ f"""/**
* DO NOT EDIT THIS FILE!
* File automatically generated by
* {frame.f_code.co_filename}:{frame.f_lineno}
*/
-''')
- elif re.match('\.py', extension) is not None:
- f.write(f'''#
+"""
+ )
+ elif re.match(r"^\.py$", extension) is not None:
+ f.write(
+ f"""#
# DO NOT EDIT THIS FILE!
# File automatically generated by
# {frame.f_code.co_filename}:{frame.f_lineno}
#
-''')
- elif re.match('\.html', extension) is not None:
- f.write(f'''<!--
+"""
+ )
+ elif re.match(r"^\.html$", extension) is not None:
+ f.write(
+ f"""<!--
DO NOT EDIT THIS FILE!
File automatically generated by
{frame.f_code.co_filename}:{frame.f_lineno}
-->
-''')
+"""
+ )
for data in self._data:
f.write(data)
f.close()
def __str__(self):
- data = ''.join(self._data)
- self._data = [ data ]
+ data = "".join(self._data)
+ self._data = [data]
return data
def __getitem__(self, item):
@@ -219,21 +243,21 @@
self._data.append(data)
return
- initial_newline = not self._data or self._data[-1] == '\n'
+ initial_newline = not self._data or self._data[-1] == "\n"
for line in data.splitlines():
if line:
if self._indent_level:
- self._data.append(' ' * self._indent_level)
+ self._data.append(" " * self._indent_level)
self._data.append(line)
if line or not initial_newline:
- self._data.append('\n')
+ self._data.append("\n")
initial_newline = False
def __call__(self, *args, **kwargs):
if not args:
- self._data.append('\n')
+ self._data.append("\n")
return
format = args[0]
@@ -242,51 +266,56 @@
frame = inspect.currentframe().f_back
l = lookup(self, frame, *args, **kwargs)
+
def convert(match):
- ident = match.group('lone')
+ ident = match.group("lone")
# check for a lone identifier
if ident:
- indent = match.group('indent') # must be spaces
- lone = '%s' % (l[ident], )
+ indent = match.group("indent") # must be spaces
+ lone = "%s" % (l[ident],)
def indent_lines(gen):
for line in gen:
yield indent
yield line
- return ''.join(indent_lines(lone.splitlines(True)))
+
+ return "".join(indent_lines(lone.splitlines(True)))
# check for an identifier, braced or not
- ident = match.group('ident') or match.group('b_ident')
+ ident = match.group("ident") or match.group("b_ident")
if ident is not None:
- return '%s' % (l[ident], )
+ return "%s" % (l[ident],)
# check for a positional parameter, braced or not
- pos = match.group('pos') or match.group('b_pos')
+ pos = match.group("pos") or match.group("b_pos")
if pos is not None:
pos = int(pos)
if pos > len(args):
- raise ValueError \
- ('Positional parameter #%d not found in pattern' % pos,
- code_formatter.pattern)
- return '%s' % (args[int(pos)], )
+ raise ValueError(
+ "Positional parameter #%d not found in pattern" % pos,
+ code_formatter.pattern,
+ )
+ return "%s" % (args[int(pos)],)
# check for a double braced expression
- eval_expr = match.group('eval')
+ eval_expr = match.group("eval")
if eval_expr is not None:
result = eval(eval_expr, {}, l)
- return '%s' % (result, )
+ return "%s" % (result,)
# check for an escaped delimiter
- if match.group('escaped') is not None:
- return '$'
+ if match.group("escaped") is not None:
+ return "$"
# At this point, we have to match invalid
- if match.group('invalid') is None:
+ if match.group("invalid") is None:
# didn't match invalid!
- raise ValueError('Unrecognized named group in pattern',
- code_formatter.pattern)
+ raise ValueError(
+ "Unrecognized named group in pattern",
+ code_formatter.pattern,
+ )
- i = match.start('invalid')
+ i = match.start("invalid")
if i == 0:
colno = 1
lineno = 1
@@ -295,52 +324,64 @@
colno = i - sum(len(z) for z in lines)
lineno = len(lines)
- raise ValueError('Invalid format string: line %d, col %d' %
- (lineno, colno))
+ raise ValueError(
+ "Invalid format string: line %d, col %d" % (lineno, colno)
+ )
d = code_formatter.pattern.sub(convert, format)
self._append(d)
-__all__ = [ "code_formatter" ]
-if __name__ == '__main__':
+__all__ = ["code_formatter"]
+
+if __name__ == "__main__":
from .code_formatter import code_formatter
+
f = code_formatter()
class Foo(dict):
def __init__(self, **kwargs):
self.update(kwargs)
+
def __getattr__(self, attr):
return self[attr]
x = "this is a test"
- l = [ [Foo(x=[Foo(y=9)])] ]
+ l = [[Foo(x=[Foo(y=9)])]]
y = code_formatter()
- y('''
+ y(
+ """
{
this_is_a_test();
}
-''')
- f(' $y')
- f('''$__file__:$__line__
-{''')
+"""
+ )
+ f(" $y")
+ f(
+ """$__file__:$__line__
+{"""
+ )
f("${{', '.join(str(x) for x in range(4))}}")
- f('${x}')
- f('$x')
+ f("${x}")
+ f("$x")
f.indent()
for i in range(5):
- f('$x')
- f('$i')
- f('$0', "zero")
- f('$1 $0', "zero", "one")
- f('${0}', "he went")
- f('${0}asdf', "he went")
+ f("$x")
+ f("$i")
+ f("$0", "zero")
+ f("$1 $0", "zero", "one")
+ f("${0}", "he went")
+ f("${0}asdf", "he went")
f.dedent()
- f('''
+ f(
+ """
${{l[0][0]["x"][0].y}}
}
-''', 1, 9)
+""",
+ 1,
+ 9,
+ )
- print(f, end=' ')
+ print(f, end=" ")
diff --git a/build_tools/cxx_config_cc.py b/build_tools/cxx_config_cc.py
index c4a2d89..a908aa8 100644
--- a/build_tools/cxx_config_cc.py
+++ b/build_tools/cxx_config_cc.py
@@ -46,8 +46,8 @@
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the simobject belongs to')
-parser.add_argument('cxx_config_cc', help='cxx config cc file to generate')
+parser.add_argument("modpath", help="module the simobject belongs to")
+parser.add_argument("cxx_config_cc", help="cxx config cc file to generate")
args = parser.parse_args()
@@ -63,22 +63,25 @@
code = code_formatter()
-entry_class = 'CxxConfigDirectoryEntry_%s' % sim_object_name
-param_class = '%sCxxConfigParams' % sim_object_name
+entry_class = "CxxConfigDirectoryEntry_%s" % sim_object_name
+param_class = "%sCxxConfigParams" % sim_object_name
+
def cxx_bool(b):
- return 'true' if b else 'false'
+ return "true" if b else "false"
+
code('#include "params/%s.hh"' % sim_object_name)
for param in sim_object._params.values():
if isSimObjectClass(param.ptype):
- code('#include "%s"' % param.ptype._value_dict['cxx_header'])
+ code('#include "%s"' % param.ptype._value_dict["cxx_header"])
code('#include "params/%s.hh"' % param.ptype.__name__)
else:
param.ptype.cxx_ini_predecls(code)
-code('''#include "${{sim_object._value_dict['cxx_header']}}"
+code(
+ """#include "${{sim_object._value_dict['cxx_header']}}"
#include "base/str.hh"
#include "cxx_config/${sim_object_name}.hh"
@@ -87,34 +90,39 @@
${param_class}::DirectoryEntry::DirectoryEntry()
{
-''')
+"""
+)
code.indent()
for param in sim_object._params.values():
is_vector = isinstance(param, m5.params.VectorParamDesc)
is_simobj = issubclass(param.ptype, m5.SimObject.SimObject)
- code('parameters["%s"] = new ParamDesc("%s", %s, %s);' %
- (param.name, param.name, cxx_bool(is_vector),
- cxx_bool(is_simobj)));
+ code(
+ 'parameters["%s"] = new ParamDesc("%s", %s, %s);'
+ % (param.name, param.name, cxx_bool(is_vector), cxx_bool(is_simobj))
+ )
for port in sim_object._ports.values():
is_vector = isinstance(port, m5.params.VectorPort)
- is_requestor = port.role == 'GEM5 REQUESTOR'
+ is_requestor = port.role == "GEM5 REQUESTOR"
- code('ports["%s"] = new PortDesc("%s", %s, %s);' %
- (port.name, port.name, cxx_bool(is_vector),
- cxx_bool(is_requestor)))
+ code(
+ 'ports["%s"] = new PortDesc("%s", %s, %s);'
+ % (port.name, port.name, cxx_bool(is_vector), cxx_bool(is_requestor))
+ )
code.dedent()
-code('''}
+code(
+ """}
bool
${param_class}::setSimObject(const std::string &name, SimObject *simObject)
{
bool ret = true;
if (false) {
-''')
+"""
+)
code.indent()
for param in sim_object._params.values():
@@ -124,14 +132,17 @@
if is_simobj and not is_vector:
code('} else if (name == "${{param.name}}") {')
code.indent()
- code('this->${{param.name}} = '
- 'dynamic_cast<${{param.ptype.cxx_type}}>(simObject);')
- code('if (simObject && !this->${{param.name}})')
- code(' ret = false;')
+ code(
+ "this->${{param.name}} = "
+ "dynamic_cast<${{param.ptype.cxx_type}}>(simObject);"
+ )
+ code("if (simObject && !this->${{param.name}})")
+ code(" ret = false;")
code.dedent()
code.dedent()
-code('''
+code(
+ """
} else {
ret = false;
}
@@ -146,7 +157,8 @@
bool ret = true;
if (false) {
-''')
+"""
+)
code.indent()
for param in sim_object._params.values():
@@ -156,23 +168,28 @@
if is_simobj and is_vector:
code('} else if (name == "${{param.name}}") {')
code.indent()
- code('this->${{param.name}}.clear();')
- code('for (auto i = simObjects.begin(); '
- 'ret && i != simObjects.end(); i ++)')
- code('{')
+ code("this->${{param.name}}.clear();")
+ code(
+ "for (auto i = simObjects.begin(); "
+ "ret && i != simObjects.end(); i ++)"
+ )
+ code("{")
code.indent()
- code('${{param.ptype.cxx_type}} object = '
- 'dynamic_cast<${{param.ptype.cxx_type}}>(*i);')
- code('if (*i && !object)')
- code(' ret = false;')
- code('else')
- code(' this->${{param.name}}.push_back(object);')
+ code(
+ "${{param.ptype.cxx_type}} object = "
+ "dynamic_cast<${{param.ptype.cxx_type}}>(*i);"
+ )
+ code("if (*i && !object)")
+ code(" ret = false;")
+ code("else")
+ code(" this->${{param.name}}.push_back(object);")
code.dedent()
- code('}')
+ code("}")
code.dedent()
code.dedent()
-code('''
+code(
+ """
} else {
ret = false;
}
@@ -193,7 +210,8 @@
bool ret = true;
if (false) {
-''')
+"""
+)
code.indent()
for param in sim_object._params.values():
@@ -203,12 +221,14 @@
if not is_simobj and not is_vector:
code('} else if (name == "${{param.name}}") {')
code.indent()
- param.ptype.cxx_ini_parse(code,
- 'value', 'this->%s' % param.name, 'ret =')
+ param.ptype.cxx_ini_parse(
+ code, "value", "this->%s" % param.name, "ret ="
+ )
code.dedent()
code.dedent()
-code('''
+code(
+ """
} else {
ret = false;
}
@@ -223,7 +243,8 @@
bool ret = true;
if (false) {
-''')
+"""
+)
code.indent()
for param in sim_object._params.values():
@@ -233,22 +254,23 @@
if not is_simobj and is_vector:
code('} else if (name == "${{param.name}}") {')
code.indent()
- code('${{param.name}}.clear();')
- code('for (auto i = values.begin(); '
- 'ret && i != values.end(); i ++)')
- code('{')
+ code("${{param.name}}.clear();")
+ code(
+ "for (auto i = values.begin(); " "ret && i != values.end(); i ++)"
+ )
+ code("{")
code.indent()
- code('${{param.ptype.cxx_type}} elem;')
- param.ptype.cxx_ini_parse(code,
- '*i', 'elem', 'ret =')
- code('if (ret)')
- code(' this->${{param.name}}.push_back(elem);')
+ code("${{param.ptype.cxx_type}} elem;")
+ param.ptype.cxx_ini_parse(code, "*i", "elem", "ret =")
+ code("if (ret)")
+ code(" this->${{param.name}}.push_back(elem);")
code.dedent()
- code('}')
+ code("}")
code.dedent()
code.dedent()
-code('''
+code(
+ """
} else {
ret = false;
}
@@ -263,15 +285,17 @@
bool ret = true;
if (false) {
-''')
+"""
+)
code.indent()
for port in sim_object._ports.values():
code('} else if (name == "${{port.name}}") {')
- code(' this->port_${{port.name}}_connection_count = count;')
+ code(" this->port_${{port.name}}_connection_count = count;")
code.dedent()
-code('''
+code(
+ """
} else {
ret = false;
}
@@ -282,18 +306,21 @@
SimObject *
${param_class}::simObjectCreate()
{
-''')
+"""
+)
code.indent()
-if hasattr(sim_object, 'abstract') and sim_object.abstract:
- code('return nullptr;')
+if hasattr(sim_object, "abstract") and sim_object.abstract:
+ code("return nullptr;")
else:
- code('return this->create();')
+ code("return this->create();")
code.dedent()
-code('''}
+code(
+ """}
} // namespace gem5
-''')
+"""
+)
code.write(args.cxx_config_cc)
diff --git a/build_tools/cxx_config_hh.py b/build_tools/cxx_config_hh.py
index 652c488..55828e3 100644
--- a/build_tools/cxx_config_hh.py
+++ b/build_tools/cxx_config_hh.py
@@ -46,8 +46,8 @@
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the simobject belongs to')
-parser.add_argument('cxx_config_hh', help='cxx config header file to generate')
+parser.add_argument("modpath", help="module the simobject belongs to")
+parser.add_argument("cxx_config_hh", help="cxx config header file to generate")
args = parser.parse_args()
@@ -60,10 +60,11 @@
code = code_formatter()
-entry_class = 'CxxConfigDirectoryEntry_%s' % sim_object_name
-param_class = '%sCxxConfigParams' % sim_object_name
+entry_class = "CxxConfigDirectoryEntry_%s" % sim_object_name
+param_class = "%sCxxConfigParams" % sim_object_name
-code('''#include "params/${sim_object_name}.hh"
+code(
+ """#include "params/${sim_object_name}.hh"
#include "sim/cxx_config.hh"
@@ -110,6 +111,7 @@
};
} // namespace gem5
-''')
+"""
+)
code.write(args.cxx_config_hh)
diff --git a/build_tools/debugflaghh.py b/build_tools/debugflaghh.py
index fc86cb0..2e861e2 100644
--- a/build_tools/debugflaghh.py
+++ b/build_tools/debugflaghh.py
@@ -44,35 +44,41 @@
parser.add_argument("hh", help="the path of the debug flag header file")
parser.add_argument("name", help="the name of the debug flag")
parser.add_argument("desc", help="a description of the debug flag")
-parser.add_argument("fmt",
- help="whether the flag is a format flag (True or False)")
-parser.add_argument("components",
- help="components of a compound flag, if applicable, joined with :")
+parser.add_argument(
+ "fmt", help="whether the flag is a format flag (True or False)"
+)
+parser.add_argument(
+ "components",
+ help="components of a compound flag, if applicable, joined with :",
+)
args = parser.parse_args()
fmt = args.fmt.lower()
-if fmt == 'true':
+if fmt == "true":
fmt = True
-elif fmt == 'false':
+elif fmt == "false":
fmt = False
else:
print(f'Unrecognized "FMT" value {fmt}', file=sys.stderr)
sys.exit(1)
-components = args.components.split(':') if args.components else []
+components = args.components.split(":") if args.components else []
code = code_formatter()
-code('''
+code(
+ """
#ifndef __DEBUG_${{args.name}}_HH__
#define __DEBUG_${{args.name}}_HH__
#include "base/compiler.hh" // For namespace deprecation
#include "base/debug.hh"
-''')
+"""
+)
for flag in components:
code('#include "debug/${flag}.hh"')
-code('''
+code(
+ """
namespace gem5
{
@@ -82,14 +88,16 @@
namespace unions
{
-''')
+"""
+)
# Use unions to prevent debug flags from being destructed. It's the
# responsibility of the programmer to handle object destruction for members
# of the union. We purposefully leave that destructor empty so that we can
# use debug flags even in the destructors of other objects.
if components:
- code('''
+ code(
+ """
inline union ${{args.name}}
{
~${{args.name}}() {}
@@ -100,9 +108,11 @@
}
};
} ${{args.name}};
-''')
+"""
+ )
else:
- code('''
+ code(
+ """
inline union ${{args.name}}
{
~${{args.name}}() {}
@@ -110,18 +120,21 @@
"${{args.name}}", "${{args.desc}}", ${{"true" if fmt else "false"}}
};
} ${{args.name}};
-''')
+"""
+ )
-code('''
+code(
+ """
} // namespace unions
-inline constexpr const auto& ${{args.name}} =
+inline constexpr const auto& ${{args.name}} =
::gem5::debug::unions::${{args.name}}.${{args.name}};
} // namespace debug
} // namespace gem5
#endif // __DEBUG_${{args.name}}_HH__
-''')
+"""
+)
code.write(args.hh)
diff --git a/build_tools/enum_cc.py b/build_tools/enum_cc.py
index c706ffe..476e49d 100644
--- a/build_tools/enum_cc.py
+++ b/build_tools/enum_cc.py
@@ -46,17 +46,18 @@
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the enum belongs to')
-parser.add_argument('enum_cc', help='enum cc file to generate')
-parser.add_argument('use_python',
- help='whether python is enabled in gem5 (True or False)')
+parser.add_argument("modpath", help="module the enum belongs to")
+parser.add_argument("enum_cc", help="enum cc file to generate")
+parser.add_argument(
+ "use_python", help="whether python is enabled in gem5 (True or False)"
+)
args = parser.parse_args()
use_python = args.use_python.lower()
-if use_python == 'true':
+if use_python == "true":
use_python = True
-elif use_python == 'false':
+elif use_python == "false":
use_python = False
else:
print(f'Unrecognized "use_python" value {use_python}', file=sys.stderr)
@@ -75,41 +76,46 @@
file_name = enum.__name__
name = enum.__name__ if enum.enum_name is None else enum.enum_name
-code('''#include "base/compiler.hh"
+code(
+ """#include "base/compiler.hh"
#include "enums/$file_name.hh"
namespace gem5
{
-''')
+"""
+)
if enum.wrapper_is_struct:
- code('const char *${wrapper_name}::${name}Strings'
- '[Num_${name}] =')
+ code("const char *${wrapper_name}::${name}Strings" "[Num_${name}] =")
else:
if enum.is_class:
- code('''\
+ code(
+ """\
const char *${name}Strings[static_cast<int>(${name}::Num_${name})] =
-''')
+"""
+ )
else:
- code('''GEM5_DEPRECATED_NAMESPACE(Enums, enums);
+ code(
+ """GEM5_DEPRECATED_NAMESPACE(Enums, enums);
namespace enums
-{''')
+{"""
+ )
code.indent(1)
- code('const char *${name}Strings[Num_${name}] =')
+ code("const char *${name}Strings[Num_${name}] =")
-code('{')
+code("{")
code.indent(1)
for val in enum.vals:
code('"$val",')
code.dedent(1)
-code('};')
+code("};")
if not enum.wrapper_is_struct and not enum.is_class:
code.dedent(1)
- code('} // namespace enums')
+ code("} // namespace enums")
-code('} // namespace gem5')
+code("} // namespace gem5")
if use_python:
@@ -118,7 +124,8 @@
enum_name = enum.__name__ if enum.enum_name is None else enum.enum_name
wrapper_name = enum_name if enum.is_class else enum.wrapper_name
- code('''#include "pybind11/pybind11.h"
+ code(
+ """#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include <sim/init.hh>
@@ -133,7 +140,8 @@
{
py::module_ m = m_internal.def_submodule("enum_${name}");
-''')
+"""
+ )
if enum.is_class:
code('py::enum_<${enum_name}>(m, "enum_${name}")')
else:
@@ -145,16 +153,18 @@
code('.value("${val}", ${wrapper_name}::${val})')
code('.value("Num_${name}", ${wrapper_name}::Num_${enum_name})')
if not enum.is_class:
- code('.export_values()')
- code(';')
+ code(".export_values()")
+ code(";")
code.dedent()
- code('}')
+ code("}")
code.dedent()
- code('''
+ code(
+ """
static EmbeddedPyBind embed_enum("enum_${name}", module_init);
} // namespace gem5
- ''')
+ """
+ )
code.write(args.enum_cc)
diff --git a/build_tools/enum_hh.py b/build_tools/enum_hh.py
index 2c4a7bb..a5b9f42 100644
--- a/build_tools/enum_hh.py
+++ b/build_tools/enum_hh.py
@@ -46,8 +46,8 @@
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the enum belongs to')
-parser.add_argument('enum_hh', help='enum header file to generate')
+parser.add_argument("modpath", help="module the enum belongs to")
+parser.add_argument("enum_hh", help="enum header file to generate")
args = parser.parse_args()
@@ -64,53 +64,61 @@
# Note that we wrap the enum in a class/struct to act as a namespace,
# so that the enum strings can be brief w/o worrying about collisions.
wrapper_name = enum.wrapper_name
-wrapper = 'struct' if enum.wrapper_is_struct else 'namespace'
+wrapper = "struct" if enum.wrapper_is_struct else "namespace"
name = enum.__name__ if enum.enum_name is None else enum.enum_name
-idem_macro = '__ENUM__%s__%s__' % (wrapper_name, name)
+idem_macro = "__ENUM__%s__%s__" % (wrapper_name, name)
-code('''\
+code(
+ """\
#ifndef $idem_macro
#define $idem_macro
namespace gem5
{
-''')
+"""
+)
if enum.is_class:
- code('''\
+ code(
+ """\
enum class $name
{
-''')
+"""
+ )
else:
- code('''\
+ code(
+ """\
$wrapper $wrapper_name {
enum $name
{
-''')
+"""
+ )
code.indent(1)
code.indent(1)
for val in enum.vals:
- code('$val = ${{enum.map[val]}},')
-code('Num_$name = ${{len(enum.vals)}}')
+ code("$val = ${{enum.map[val]}},")
+code("Num_$name = ${{len(enum.vals)}}")
code.dedent(1)
-code('};')
+code("};")
if enum.is_class:
- code('''\
+ code(
+ """\
extern const char *${name}Strings[static_cast<int>(${name}::Num_${name})];
-''')
+"""
+ )
elif enum.wrapper_is_struct:
- code('static const char *${name}Strings[Num_${name}];')
+ code("static const char *${name}Strings[Num_${name}];")
else:
- code('extern const char *${name}Strings[Num_${name}];')
+ code("extern const char *${name}Strings[Num_${name}];")
if not enum.is_class:
code.dedent(1)
- code('}; // $wrapper_name')
+ code("}; // $wrapper_name")
code()
-code('} // namespace gem5')
+code("} // namespace gem5")
code()
-code('#endif // $idem_macro')
+code("#endif // $idem_macro")
code.write(args.enum_hh)
diff --git a/build_tools/grammar.py b/build_tools/grammar.py
index 9aba746..6ac638b 100644
--- a/build_tools/grammar.py
+++ b/build_tools/grammar.py
@@ -29,73 +29,77 @@
import ply.lex
import ply.yacc
+
class ParseError(Exception):
def __init__(self, message, token=None):
Exception.__init__(self, message)
self.token = token
+
class Grammar(object):
def setupLexerFactory(self, **kwargs):
- if 'module' in kwargs:
+ if "module" in kwargs:
raise AttributeError("module is an illegal attribute")
self.lex_kwargs = kwargs
def setupParserFactory(self, **kwargs):
- if 'module' in kwargs:
+ if "module" in kwargs:
raise AttributeError("module is an illegal attribute")
- if 'output' in kwargs:
- dir,tab = os.path.split(output)
- if not tab.endswith('.py'):
- raise AttributeError('The output file must end with .py')
- kwargs['outputdir'] = dir
- kwargs['tabmodule'] = tab[:-3]
+ if "output" in kwargs:
+ dir, tab = os.path.split(output)
+ if not tab.endswith(".py"):
+ raise AttributeError("The output file must end with .py")
+ kwargs["outputdir"] = dir
+ kwargs["tabmodule"] = tab[:-3]
self.yacc_kwargs = kwargs
def __getattr__(self, attr):
- if attr == 'lexers':
+ if attr == "lexers":
self.lexers = []
return self.lexers
- if attr == 'lex_kwargs':
+ if attr == "lex_kwargs":
self.setupLexerFactory()
return self.lex_kwargs
- if attr == 'yacc_kwargs':
+ if attr == "yacc_kwargs":
self.setupParserFactory()
return self.yacc_kwargs
- if attr == 'lex':
+ if attr == "lex":
self.lex = ply.lex.lex(module=self, **self.lex_kwargs)
return self.lex
- if attr == 'yacc':
+ if attr == "yacc":
self.yacc = ply.yacc.yacc(module=self, **self.yacc_kwargs)
return self.yacc
- if attr == 'current_lexer':
+ if attr == "current_lexer":
if not self.lexers:
return None
return self.lexers[-1][0]
- if attr == 'current_source':
+ if attr == "current_source":
if not self.lexers:
- return '<none>'
+ return "<none>"
return self.lexers[-1][1]
- if attr == 'current_line':
+ if attr == "current_line":
if not self.lexers:
return -1
return self.current_lexer.lineno
raise AttributeError(
- "'%s' object has no attribute '%s'" % (type(self), attr))
+ "'%s' object has no attribute '%s'" % (type(self), attr)
+ )
- def parse_string(self, data, source='<string>', debug=None, tracking=0):
+ def parse_string(self, data, source="<string>", debug=None, tracking=0):
if not isinstance(data, str):
raise AttributeError(
- "argument must be a string, was '%s'" % type(f))
+ "argument must be a string, was '%s'" % type(f)
+ )
lexer = self.lex.clone()
lexer.input(data)
@@ -114,24 +118,32 @@
def parse_file(self, f, **kwargs):
if isinstance(f, str):
source = f
- f = open(f, 'r')
+ f = open(f, "r")
elif isinstance(f, file):
source = f.name
else:
raise AttributeError(
- "argument must be either a string or file, was '%s'" % type(f))
+ "argument must be either a string or file, was '%s'" % type(f)
+ )
return self.parse_string(f.read(), source, **kwargs)
def p_error(self, t):
if t:
- msg = "Syntax error at %s:%d:%d\n>>%s<<" % \
- (self.current_source, t.lineno, t.lexpos + 1, t.value)
+ msg = "Syntax error at %s:%d:%d\n>>%s<<" % (
+ self.current_source,
+ t.lineno,
+ t.lexpos + 1,
+ t.value,
+ )
else:
- msg = "Syntax error at end of %s" % (self.current_source, )
+ msg = "Syntax error at end of %s" % (self.current_source,)
raise ParseError(msg, t)
def t_error(self, t):
- msg = "Illegal character %s @ %d:%d" % \
- (repr(t.value[0]), t.lineno, t.lexpos)
+ msg = "Illegal character %s @ %d:%d" % (
+ repr(t.value[0]),
+ t.lineno,
+ t.lexpos,
+ )
raise ParseError(msg, t)
diff --git a/build_tools/infopy.py b/build_tools/infopy.py
index a58cf39..4f15f24 100644
--- a/build_tools/infopy.py
+++ b/build_tools/infopy.py
@@ -42,8 +42,8 @@
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
-parser.add_argument('info_py', help='info.py file path')
-parser.add_argument('files', help='file to include in info.py', nargs='*')
+parser.add_argument("info_py", help="info.py file path")
+parser.add_argument("files", help="file to include in info.py", nargs="*")
args = parser.parse_args()
@@ -52,8 +52,8 @@
for source in args.files:
src = os.path.basename(source)
- with open(source, 'r') as f:
- data = ''.join(f)
- code('${src} = ${{repr(data)}}')
+ with open(source, "r") as f:
+ data = "".join(f)
+ code("${src} = ${{repr(data)}}")
code.write(args.info_py)
diff --git a/build_tools/marshal.py b/build_tools/marshal.py
index 9c2964b..18afe2c 100644
--- a/build_tools/marshal.py
+++ b/build_tools/marshal.py
@@ -67,16 +67,17 @@
_, cpp, python, modpath, abspath = sys.argv
-with open(python, 'r') as f:
+with open(python, "r") as f:
src = f.read()
-compiled = compile(src, python, 'exec')
+compiled = compile(src, python, "exec")
marshalled = marshal.dumps(compiled)
compressed = zlib.compress(marshalled)
code = code_formatter()
-code('''\
+code(
+ """\
#include "python/embedded.hh"
namespace gem5
@@ -84,14 +85,16 @@
namespace
{
-''')
+"""
+)
-bytesToCppArray(code, 'embedded_module_data', compressed)
+bytesToCppArray(code, "embedded_module_data", compressed)
# The name of the EmbeddedPython object doesn't matter since it's in an
# anonymous namespace, and it's constructor takes care of installing it into a
# global list.
-code('''
+code(
+ """
EmbeddedPython embedded_module_info(
"${abspath}",
"${modpath}",
@@ -101,6 +104,7 @@
} // anonymous namespace
} // namespace gem5
-''')
+"""
+)
code.write(cpp)
diff --git a/build_tools/sim_object_param_struct_cc.py b/build_tools/sim_object_param_struct_cc.py
index 1b72e3c..0384809 100644
--- a/build_tools/sim_object_param_struct_cc.py
+++ b/build_tools/sim_object_param_struct_cc.py
@@ -46,17 +46,18 @@
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the simobject belongs to')
-parser.add_argument('param_cc', help='parameter cc file to generate')
-parser.add_argument('use_python',
- help='whether python is enabled in gem5 (True or False)')
+parser.add_argument("modpath", help="module the simobject belongs to")
+parser.add_argument("param_cc", help="parameter cc file to generate")
+parser.add_argument(
+ "use_python", help="whether python is enabled in gem5 (True or False)"
+)
args = parser.parse_args()
use_python = args.use_python.lower()
-if use_python == 'true':
+if use_python == "true":
use_python = True
-elif use_python == 'false':
+elif use_python == "false":
use_python = False
else:
print(f'Unrecognized "use_python" value {use_python}', file=sys.stderr)
@@ -64,7 +65,7 @@
basename = os.path.basename(args.param_cc)
no_ext = os.path.splitext(basename)[0]
-sim_object_name = '_'.join(no_ext.split('_')[1:])
+sim_object_name = "_".join(no_ext.split("_")[1:])
importer.install()
module = importlib.import_module(args.modpath)
@@ -80,14 +81,16 @@
# the object itself, not including inherited params (which
# will also be inherited from the base class's param struct
# here). Sort the params based on their key
-params = list(map(lambda k_v: k_v[1],
- sorted(sim_object._params.local.items())))
+params = list(
+ map(lambda k_v: k_v[1], sorted(sim_object._params.local.items()))
+)
ports = sim_object._ports.local
# only include pybind if python is enabled in the build
if use_python:
- code('''#include "pybind11/pybind11.h"
+ code(
+ """#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include <type_traits>
@@ -99,9 +102,11 @@
#include "${{sim_object.cxx_header}}"
-''')
+"""
+ )
else:
- code('''
+ code(
+ """
#include <type_traits>
#include "base/compiler.hh"
@@ -109,13 +114,15 @@
#include "${{sim_object.cxx_header}}"
-''')
+"""
+ )
# only include the python params code if python is enabled.
if use_python:
for param in params:
param.pybind_predecls(code)
- code('''namespace py = pybind11;
+ code(
+ """namespace py = pybind11;
namespace gem5
{
@@ -124,39 +131,48 @@
module_init(py::module_ &m_internal)
{
py::module_ m = m_internal.def_submodule("param_${sim_object}");
-''')
+"""
+ )
code.indent()
if sim_object._base:
- code('py::class_<${sim_object}Params, ' \
- '${{sim_object._base.type}}Params, ' \
- 'std::unique_ptr<${{sim_object}}Params, py::nodelete>>(' \
- 'm, "${sim_object}Params")')
+ code(
+ "py::class_<${sim_object}Params, "
+ "${{sim_object._base.type}}Params, "
+ "std::unique_ptr<${{sim_object}}Params, py::nodelete>>("
+ 'm, "${sim_object}Params")'
+ )
else:
- code('py::class_<${sim_object}Params, ' \
- 'std::unique_ptr<${sim_object}Params, py::nodelete>>(' \
- 'm, "${sim_object}Params")')
+ code(
+ "py::class_<${sim_object}Params, "
+ "std::unique_ptr<${sim_object}Params, py::nodelete>>("
+ 'm, "${sim_object}Params")'
+ )
code.indent()
- if not hasattr(sim_object, 'abstract') or not sim_object.abstract:
- code('.def(py::init<>())')
+ if not hasattr(sim_object, "abstract") or not sim_object.abstract:
+ code(".def(py::init<>())")
code('.def("create", &${sim_object}Params::create)')
- param_exports = sim_object.cxx_param_exports + [
- PyBindProperty(k)
- for k, v in sorted(sim_object._params.local.items())
- ] + [
- PyBindProperty(f"port_{port.name}_connection_count")
- for port in ports.values()
- ]
+ param_exports = (
+ sim_object.cxx_param_exports
+ + [
+ PyBindProperty(k)
+ for k, v in sorted(sim_object._params.local.items())
+ ]
+ + [
+ PyBindProperty(f"port_{port.name}_connection_count")
+ for port in ports.values()
+ ]
+ )
for exp in param_exports:
exp.export(code, f"{sim_object}Params")
- code(';')
+ code(";")
code()
code.dedent()
bases = []
- if 'cxx_base' in sim_object._value_dict:
+ if "cxx_base" in sim_object._value_dict:
# If the c++ base class implied by python inheritance was
# overridden, use that value.
if sim_object.cxx_base:
@@ -170,32 +186,39 @@
if bases:
base_str = ", ".join(bases)
- code('py::class_<${{sim_object.cxx_class}}, ${base_str}, ' \
- 'std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>(' \
- 'm, "${py_class_name}")')
+ code(
+ "py::class_<${{sim_object.cxx_class}}, ${base_str}, "
+ "std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>("
+ 'm, "${py_class_name}")'
+ )
else:
- code('py::class_<${{sim_object.cxx_class}}, ' \
- 'std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>(' \
- 'm, "${py_class_name}")')
+ code(
+ "py::class_<${{sim_object.cxx_class}}, "
+ "std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>("
+ 'm, "${py_class_name}")'
+ )
code.indent()
for exp in sim_object.cxx_exports:
exp.export(code, sim_object.cxx_class)
- code(';')
+ code(";")
code.dedent()
code()
code.dedent()
- code('}')
+ code("}")
code()
- code('static EmbeddedPyBind '
- 'embed_obj("${0}", module_init, "${1}");',
- sim_object, sim_object._base.type if sim_object._base else "")
+ code(
+ "static EmbeddedPyBind " 'embed_obj("${0}", module_init, "${1}");',
+ sim_object,
+ sim_object._base.type if sim_object._base else "",
+ )
code()
- code('} // namespace gem5')
+ code("} // namespace gem5")
# include the create() methods whether or not python is enabled.
-if not hasattr(sim_object, 'abstract') or not sim_object.abstract:
- if 'type' in sim_object.__dict__:
- code('''
+if not hasattr(sim_object, "abstract") or not sim_object.abstract:
+ if "type" in sim_object.__dict__:
+ code(
+ """
namespace gem5
{
@@ -268,6 +291,7 @@
}
} // namespace gem5
-''')
+"""
+ )
code.write(args.param_cc)
diff --git a/build_tools/sim_object_param_struct_hh.py b/build_tools/sim_object_param_struct_hh.py
index 261ac9b..bf37da2 100644
--- a/build_tools/sim_object_param_struct_hh.py
+++ b/build_tools/sim_object_param_struct_hh.py
@@ -46,8 +46,8 @@
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the simobject belongs to')
-parser.add_argument('param_hh', help='parameter header file to generate')
+parser.add_argument("modpath", help="module the simobject belongs to")
+parser.add_argument("param_hh", help="parameter header file to generate")
args = parser.parse_args()
@@ -67,8 +67,9 @@
# the object itself, not including inherited params (which
# will also be inherited from the base class's param struct
# here). Sort the params based on their key
-params = list(map(lambda k_v: k_v[1],
- sorted(sim_object._params.local.items())))
+params = list(
+ map(lambda k_v: k_v[1], sorted(sim_object._params.local.items()))
+)
ports = sim_object._ports.local
try:
ptypes = [p.ptype for p in params]
@@ -79,41 +80,44 @@
warned_about_nested_templates = False
+
class CxxClass(object):
def __init__(self, sig, template_params=[]):
# Split the signature into its constituent parts. This could
# potentially be done with regular expressions, but
# it's simple enough to pick appart a class signature
# manually.
- parts = sig.split('<', 1)
+ parts = sig.split("<", 1)
base = parts[0]
t_args = []
if len(parts) > 1:
# The signature had template arguments.
- text = parts[1].rstrip(' \t\n>')
- arg = ''
+ text = parts[1].rstrip(" \t\n>")
+ arg = ""
# Keep track of nesting to avoid splitting on ","s embedded
# in the arguments themselves.
depth = 0
for c in text:
- if c == '<':
+ if c == "<":
depth = depth + 1
if depth > 0 and not warned_about_nested_templates:
warned_about_nested_templates = True
- print('Nested template argument in cxx_class.'
- ' This feature is largely untested and '
- ' may not work.')
- elif c == '>':
+ print(
+ "Nested template argument in cxx_class."
+ " This feature is largely untested and "
+ " may not work."
+ )
+ elif c == ">":
depth = depth - 1
- elif c == ',' and depth == 0:
+ elif c == "," and depth == 0:
t_args.append(arg.strip())
- arg = ''
+ arg = ""
else:
arg = arg + c
if arg:
t_args.append(arg.strip())
# Split the non-template part on :: boundaries.
- class_path = base.split('::')
+ class_path = base.split("::")
# The namespaces are everything except the last part of the class path.
self.namespaces = class_path[:-1]
@@ -125,7 +129,7 @@
# Iterate through the template arguments and their values. This
# will likely break if parameter packs are used.
for arg, param in zip(t_args, template_params):
- type_keys = ('class', 'typename')
+ type_keys = ("class", "typename")
# If a parameter is a type, parse it recursively. Otherwise
# assume it's a constant, and store it verbatim.
if any(param.strip().startswith(kw) for kw in type_keys):
@@ -140,21 +144,24 @@
arg.declare(code)
# Re-open the target namespace.
for ns in self.namespaces:
- code('namespace $ns {')
+ code("namespace $ns {")
# If this is a class template...
if self.template_params:
code('template <${{", ".join(self.template_params)}}>')
# The actual class declaration.
- code('class ${{self.name}};')
+ code("class ${{self.name}};")
# Close the target namespaces.
for ns in reversed(self.namespaces):
- code('} // namespace $ns')
+ code("} // namespace $ns")
-code('''\
+
+code(
+ """\
#ifndef __PARAMS__${sim_object}__
#define __PARAMS__${sim_object}__
-''')
+"""
+)
# The base SimObject has a couple of params that get
@@ -162,10 +169,12 @@
# the normal Param mechanism; we slip them in here (needed
# predecls now, actual declarations below)
if sim_object == SimObject:
- code('''#include <string>''')
+ code("""#include <string>""")
-cxx_class = CxxClass(sim_object._value_dict['cxx_class'],
- sim_object._value_dict['cxx_template_params'])
+cxx_class = CxxClass(
+ sim_object._value_dict["cxx_class"],
+ sim_object._value_dict["cxx_template_params"],
+)
# A forward class declaration is sufficient since we are just
# declaring a pointer.
@@ -186,27 +195,29 @@
code('#include "enums/${{ptype.__name__}}.hh"')
code()
-code('namespace gem5')
-code('{')
-code('')
+code("namespace gem5")
+code("{")
+code("")
# now generate the actual param struct
code("struct ${sim_object}Params")
if sim_object._base:
code(" : public ${{sim_object._base.type}}Params")
code("{")
-if not hasattr(sim_object, 'abstract') or not sim_object.abstract:
- if 'type' in sim_object.__dict__:
+if not hasattr(sim_object, "abstract") or not sim_object.abstract:
+ if "type" in sim_object.__dict__:
code(" ${{sim_object.cxx_type}} create() const;")
code.indent()
if sim_object == SimObject:
- code('''
+ code(
+ """
SimObjectParams() {}
virtual ~SimObjectParams() {}
std::string name;
- ''')
+ """
+ )
for param in params:
param.cxx_decl(code)
@@ -214,11 +225,11 @@
port.cxx_decl(code)
code.dedent()
-code('};')
+code("};")
code()
-code('} // namespace gem5')
+code("} // namespace gem5")
code()
-code('#endif // __PARAMS__${sim_object}__')
+code("#endif // __PARAMS__${sim_object}__")
code.write(args.param_hh)
diff --git a/configs/common/Benchmarks.py b/configs/common/Benchmarks.py
index 591c044..c90e78e 100644
--- a/configs/common/Benchmarks.py
+++ b/configs/common/Benchmarks.py
@@ -28,9 +28,11 @@
from os import environ as env
from m5.defines import buildEnv
+
class SysConfig:
- def __init__(self, script=None, mem=None, disks=None, rootdev=None,
- os_type='linux'):
+ def __init__(
+ self, script=None, mem=None, disks=None, rootdev=None, os_type="linux"
+ ):
self.scriptname = script
self.disknames = disks
self.memsize = mem
@@ -41,13 +43,13 @@
if self.scriptname:
return script(self.scriptname)
else:
- return ''
+ return ""
def mem(self):
if self.memsize:
return self.memsize
else:
- return '128MB'
+ return "128MB"
def disks(self):
if self.disknames:
@@ -59,72 +61,117 @@
if self.root:
return self.root
else:
- return '/dev/sda1'
+ return "/dev/sda1"
def os_type(self):
return self.ostype
+
# Benchmarks are defined as a key in a dict which is a list of SysConfigs
# The first defined machine is the test system, the others are driving systems
Benchmarks = {
- 'PovrayBench': [SysConfig('povray-bench.rcS', '512MB', ['povray.img'])],
- 'PovrayAutumn': [SysConfig('povray-autumn.rcS', '512MB', ['povray.img'])],
-
- 'NetperfStream': [SysConfig('netperf-stream-client.rcS'),
- SysConfig('netperf-server.rcS')],
- 'NetperfStreamUdp': [SysConfig('netperf-stream-udp-client.rcS'),
- SysConfig('netperf-server.rcS')],
- 'NetperfUdpLocal': [SysConfig('netperf-stream-udp-local.rcS')],
- 'NetperfStreamNT': [SysConfig('netperf-stream-nt-client.rcS'),
- SysConfig('netperf-server.rcS')],
- 'NetperfMaerts': [SysConfig('netperf-maerts-client.rcS'),
- SysConfig('netperf-server.rcS')],
- 'SurgeStandard': [SysConfig('surge-server.rcS', '512MB'),
- SysConfig('surge-client.rcS', '256MB')],
- 'SurgeSpecweb': [SysConfig('spec-surge-server.rcS', '512MB'),
- SysConfig('spec-surge-client.rcS', '256MB')],
- 'Nhfsstone': [SysConfig('nfs-server-nhfsstone.rcS', '512MB'),
- SysConfig('nfs-client-nhfsstone.rcS')],
- 'Nfs': [SysConfig('nfs-server.rcS', '900MB'),
- SysConfig('nfs-client-dbench.rcS')],
- 'NfsTcp': [SysConfig('nfs-server.rcS', '900MB'),
- SysConfig('nfs-client-tcp.rcS')],
- 'IScsiInitiator': [SysConfig('iscsi-client.rcS', '512MB'),
- SysConfig('iscsi-server.rcS', '512MB')],
- 'IScsiTarget': [SysConfig('iscsi-server.rcS', '512MB'),
- SysConfig('iscsi-client.rcS', '512MB')],
- 'Validation': [SysConfig('iscsi-server.rcS', '512MB'),
- SysConfig('iscsi-client.rcS', '512MB')],
- 'Ping': [SysConfig('ping-server.rcS',),
- SysConfig('ping-client.rcS')],
-
- 'ValAccDelay': [SysConfig('devtime.rcS', '512MB')],
- 'ValAccDelay2': [SysConfig('devtimewmr.rcS', '512MB')],
- 'ValMemLat': [SysConfig('micro_memlat.rcS', '512MB')],
- 'ValMemLat2MB': [SysConfig('micro_memlat2mb.rcS', '512MB')],
- 'ValMemLat8MB': [SysConfig('micro_memlat8mb.rcS', '512MB')],
- 'ValMemLat': [SysConfig('micro_memlat8.rcS', '512MB')],
- 'ValTlbLat': [SysConfig('micro_tlblat.rcS', '512MB')],
- 'ValSysLat': [SysConfig('micro_syscall.rcS', '512MB')],
- 'ValCtxLat': [SysConfig('micro_ctx.rcS', '512MB')],
- 'ValStream': [SysConfig('micro_stream.rcS', '512MB')],
- 'ValStreamScale': [SysConfig('micro_streamscale.rcS', '512MB')],
- 'ValStreamCopy': [SysConfig('micro_streamcopy.rcS', '512MB')],
-
- 'MutexTest': [SysConfig('mutex-test.rcS', '128MB')],
- 'ArmAndroid-GB': [SysConfig('null.rcS', '256MB',
- ['ARMv7a-Gingerbread-Android.SMP.mouse.nolock.clean.img'],
- None, 'android-gingerbread')],
- 'bbench-gb': [SysConfig('bbench-gb.rcS', '256MB',
- ['ARMv7a-Gingerbread-Android.SMP.mouse.nolock.img'],
- None, 'android-gingerbread')],
- 'ArmAndroid-ICS': [SysConfig('null.rcS', '256MB',
- ['ARMv7a-ICS-Android.SMP.nolock.clean.img'],
- None, 'android-ics')],
- 'bbench-ics': [SysConfig('bbench-ics.rcS', '256MB',
- ['ARMv7a-ICS-Android.SMP.nolock.img'],
- None, 'android-ics')]
+ "PovrayBench": [SysConfig("povray-bench.rcS", "512MB", ["povray.img"])],
+ "PovrayAutumn": [SysConfig("povray-autumn.rcS", "512MB", ["povray.img"])],
+ "NetperfStream": [
+ SysConfig("netperf-stream-client.rcS"),
+ SysConfig("netperf-server.rcS"),
+ ],
+ "NetperfStreamUdp": [
+ SysConfig("netperf-stream-udp-client.rcS"),
+ SysConfig("netperf-server.rcS"),
+ ],
+ "NetperfUdpLocal": [SysConfig("netperf-stream-udp-local.rcS")],
+ "NetperfStreamNT": [
+ SysConfig("netperf-stream-nt-client.rcS"),
+ SysConfig("netperf-server.rcS"),
+ ],
+ "NetperfMaerts": [
+ SysConfig("netperf-maerts-client.rcS"),
+ SysConfig("netperf-server.rcS"),
+ ],
+ "SurgeStandard": [
+ SysConfig("surge-server.rcS", "512MB"),
+ SysConfig("surge-client.rcS", "256MB"),
+ ],
+ "SurgeSpecweb": [
+ SysConfig("spec-surge-server.rcS", "512MB"),
+ SysConfig("spec-surge-client.rcS", "256MB"),
+ ],
+ "Nhfsstone": [
+ SysConfig("nfs-server-nhfsstone.rcS", "512MB"),
+ SysConfig("nfs-client-nhfsstone.rcS"),
+ ],
+ "Nfs": [
+ SysConfig("nfs-server.rcS", "900MB"),
+ SysConfig("nfs-client-dbench.rcS"),
+ ],
+ "NfsTcp": [
+ SysConfig("nfs-server.rcS", "900MB"),
+ SysConfig("nfs-client-tcp.rcS"),
+ ],
+ "IScsiInitiator": [
+ SysConfig("iscsi-client.rcS", "512MB"),
+ SysConfig("iscsi-server.rcS", "512MB"),
+ ],
+ "IScsiTarget": [
+ SysConfig("iscsi-server.rcS", "512MB"),
+ SysConfig("iscsi-client.rcS", "512MB"),
+ ],
+ "Validation": [
+ SysConfig("iscsi-server.rcS", "512MB"),
+ SysConfig("iscsi-client.rcS", "512MB"),
+ ],
+ "Ping": [SysConfig("ping-server.rcS"), SysConfig("ping-client.rcS")],
+ "ValAccDelay": [SysConfig("devtime.rcS", "512MB")],
+ "ValAccDelay2": [SysConfig("devtimewmr.rcS", "512MB")],
+ "ValMemLat": [SysConfig("micro_memlat.rcS", "512MB")],
+ "ValMemLat2MB": [SysConfig("micro_memlat2mb.rcS", "512MB")],
+ "ValMemLat8MB": [SysConfig("micro_memlat8mb.rcS", "512MB")],
+ "ValMemLat": [SysConfig("micro_memlat8.rcS", "512MB")],
+ "ValTlbLat": [SysConfig("micro_tlblat.rcS", "512MB")],
+ "ValSysLat": [SysConfig("micro_syscall.rcS", "512MB")],
+ "ValCtxLat": [SysConfig("micro_ctx.rcS", "512MB")],
+ "ValStream": [SysConfig("micro_stream.rcS", "512MB")],
+ "ValStreamScale": [SysConfig("micro_streamscale.rcS", "512MB")],
+ "ValStreamCopy": [SysConfig("micro_streamcopy.rcS", "512MB")],
+ "MutexTest": [SysConfig("mutex-test.rcS", "128MB")],
+ "ArmAndroid-GB": [
+ SysConfig(
+ "null.rcS",
+ "256MB",
+ ["ARMv7a-Gingerbread-Android.SMP.mouse.nolock.clean.img"],
+ None,
+ "android-gingerbread",
+ )
+ ],
+ "bbench-gb": [
+ SysConfig(
+ "bbench-gb.rcS",
+ "256MB",
+ ["ARMv7a-Gingerbread-Android.SMP.mouse.nolock.img"],
+ None,
+ "android-gingerbread",
+ )
+ ],
+ "ArmAndroid-ICS": [
+ SysConfig(
+ "null.rcS",
+ "256MB",
+ ["ARMv7a-ICS-Android.SMP.nolock.clean.img"],
+ None,
+ "android-ics",
+ )
+ ],
+ "bbench-ics": [
+ SysConfig(
+ "bbench-ics.rcS",
+ "256MB",
+ ["ARMv7a-ICS-Android.SMP.nolock.img"],
+ None,
+ "android-ics",
+ )
+ ],
}
benchs = list(Benchmarks.keys())
diff --git a/configs/common/CacheConfig.py b/configs/common/CacheConfig.py
index 61c6a30..63ffe67 100644
--- a/configs/common/CacheConfig.py
+++ b/configs/common/CacheConfig.py
@@ -42,9 +42,13 @@
import m5
from m5.objects import *
+from gem5.isas import ISA
+from gem5.runtime import get_runtime_isa
+
from common.Caches import *
from common import ObjectList
+
def _get_hwp(hwp_option):
if hwp_option == None:
return NULL
@@ -52,23 +56,25 @@
hwpClass = ObjectList.hwp_list.get(hwp_option)
return hwpClass()
+
def _get_cache_opts(level, options):
opts = {}
- size_attr = '{}_size'.format(level)
+ size_attr = "{}_size".format(level)
if hasattr(options, size_attr):
- opts['size'] = getattr(options, size_attr)
+ opts["size"] = getattr(options, size_attr)
- assoc_attr = '{}_assoc'.format(level)
+ assoc_attr = "{}_assoc".format(level)
if hasattr(options, assoc_attr):
- opts['assoc'] = getattr(options, assoc_attr)
+ opts["assoc"] = getattr(options, assoc_attr)
- prefetcher_attr = '{}_hwp_type'.format(level)
+ prefetcher_attr = "{}_hwp_type".format(level)
if hasattr(options, prefetcher_attr):
- opts['prefetcher'] = _get_hwp(getattr(options, prefetcher_attr))
+ opts["prefetcher"] = _get_hwp(getattr(options, prefetcher_attr))
return opts
+
def config_cache(options, system):
if options.external_memory_system and (options.caches or options.l2cache):
print("External caches and internal caches are exclusive options.\n")
@@ -84,10 +90,12 @@
print("O3_ARM_v7a_3 is unavailable. Did you compile the O3 model?")
sys.exit(1)
- dcache_class, icache_class, l2_cache_class, walk_cache_class = \
- core.O3_ARM_v7a_DCache, core.O3_ARM_v7a_ICache, \
- core.O3_ARM_v7aL2, \
- None
+ dcache_class, icache_class, l2_cache_class, walk_cache_class = (
+ core.O3_ARM_v7a_DCache,
+ core.O3_ARM_v7a_ICache,
+ core.O3_ARM_v7aL2,
+ None,
+ )
elif options.cpu_type == "HPI":
try:
import cores.arm.HPI as core
@@ -95,13 +103,21 @@
print("HPI is unavailable.")
sys.exit(1)
- dcache_class, icache_class, l2_cache_class, walk_cache_class = \
- core.HPI_DCache, core.HPI_ICache, core.HPI_L2, None
+ dcache_class, icache_class, l2_cache_class, walk_cache_class = (
+ core.HPI_DCache,
+ core.HPI_ICache,
+ core.HPI_L2,
+ None,
+ )
else:
- dcache_class, icache_class, l2_cache_class, walk_cache_class = \
- L1_DCache, L1_ICache, L2Cache, None
+ dcache_class, icache_class, l2_cache_class, walk_cache_class = (
+ L1_DCache,
+ L1_ICache,
+ L2Cache,
+ None,
+ )
- if buildEnv['TARGET_ISA'] in ['x86', 'riscv']:
+ if get_runtime_isa() in [ISA.X86, ISA.RISCV]:
walk_cache_class = PageTableWalkerCache
# Set the cache line size of the system
@@ -118,10 +134,11 @@
# Provide a clock for the L2 and the L1-to-L2 bus here as they
# are not connected using addTwoLevelCacheHierarchy. Use the
# same clock as the CPUs.
- system.l2 = l2_cache_class(clk_domain=system.cpu_clk_domain,
- **_get_cache_opts('l2', options))
+ system.l2 = l2_cache_class(
+ clk_domain=system.cpu_clk_domain, **_get_cache_opts("l2", options)
+ )
- system.tol2bus = L2XBar(clk_domain = system.cpu_clk_domain)
+ system.tol2bus = L2XBar(clk_domain=system.cpu_clk_domain)
system.l2.cpu_side = system.tol2bus.mem_side_ports
system.l2.mem_side = system.membus.cpu_side_ports
@@ -130,8 +147,8 @@
for i in range(options.num_cpus):
if options.caches:
- icache = icache_class(**_get_cache_opts('l1i', options))
- dcache = dcache_class(**_get_cache_opts('l1d', options))
+ icache = icache_class(**_get_cache_opts("l1i", options))
+ dcache = dcache_class(**_get_cache_opts("l1d", options))
# If we have a walker cache specified, instantiate two
# instances here
@@ -159,8 +176,9 @@
# When connecting the caches, the clock is also inherited
# from the CPU in question
- system.cpu[i].addPrivateSplitL1Caches(icache, dcache,
- iwalkcache, dwalkcache)
+ system.cpu[i].addPrivateSplitL1Caches(
+ icache, dcache, iwalkcache, dwalkcache
+ )
if options.memchecker:
# The mem_side ports of the caches haven't been connected yet.
@@ -174,47 +192,56 @@
# on these names. For simplicity, we would advise configuring
# it to use this naming scheme; if this isn't possible, change
# the names below.
- if buildEnv['TARGET_ISA'] in ['x86', 'arm', 'riscv']:
+ if get_runtime_isa() in [ISA.X86, ISA.ARM, ISA.RISCV]:
system.cpu[i].addPrivateSplitL1Caches(
- ExternalCache("cpu%d.icache" % i),
- ExternalCache("cpu%d.dcache" % i),
- ExternalCache("cpu%d.itb_walker_cache" % i),
- ExternalCache("cpu%d.dtb_walker_cache" % i))
+ ExternalCache("cpu%d.icache" % i),
+ ExternalCache("cpu%d.dcache" % i),
+ ExternalCache("cpu%d.itb_walker_cache" % i),
+ ExternalCache("cpu%d.dtb_walker_cache" % i),
+ )
else:
system.cpu[i].addPrivateSplitL1Caches(
- ExternalCache("cpu%d.icache" % i),
- ExternalCache("cpu%d.dcache" % i))
+ ExternalCache("cpu%d.icache" % i),
+ ExternalCache("cpu%d.dcache" % i),
+ )
system.cpu[i].createInterruptController()
if options.l2cache:
system.cpu[i].connectAllPorts(
system.tol2bus.cpu_side_ports,
- system.membus.cpu_side_ports, system.membus.mem_side_ports)
+ system.membus.cpu_side_ports,
+ system.membus.mem_side_ports,
+ )
elif options.external_memory_system:
system.cpu[i].connectUncachedPorts(
- system.membus.cpu_side_ports, system.membus.mem_side_ports)
+ system.membus.cpu_side_ports, system.membus.mem_side_ports
+ )
else:
system.cpu[i].connectBus(system.membus)
return system
+
# ExternalSlave provides a "port", but when that port connects to a cache,
# the connecting CPU SimObject wants to refer to its "cpu_side".
# The 'ExternalCache' class provides this adaptation by rewriting the name,
# eliminating distracting changes elsewhere in the config code.
class ExternalCache(ExternalSlave):
def __getattr__(cls, attr):
- if (attr == "cpu_side"):
+ if attr == "cpu_side":
attr = "port"
return super(ExternalSlave, cls).__getattr__(attr)
def __setattr__(cls, attr, value):
- if (attr == "cpu_side"):
+ if attr == "cpu_side":
attr = "port"
return super(ExternalSlave, cls).__setattr__(attr, value)
+
def ExternalCacheFactory(port_type):
def make(name):
- return ExternalCache(port_data=name, port_type=port_type,
- addr_ranges=[AllMemory])
+ return ExternalCache(
+ port_data=name, port_type=port_type, addr_ranges=[AllMemory]
+ )
+
return make
diff --git a/configs/common/Caches.py b/configs/common/Caches.py
index 1468b95..e25d16c 100644
--- a/configs/common/Caches.py
+++ b/configs/common/Caches.py
@@ -39,6 +39,8 @@
from m5.defines import buildEnv
from m5.objects import *
+from gem5.isas import ISA
+from gem5.runtime import get_runtime_isa
# Base implementations of L1, L2, IO and TLB-walker caches. There are
# used in the regressions and also as base components in the
@@ -46,6 +48,7 @@
# starting point, and specific parameters can be overridden in the
# specific instantiations.
+
class L1Cache(Cache):
assoc = 2
tag_latency = 2
@@ -54,14 +57,17 @@
mshrs = 4
tgts_per_mshr = 20
+
class L1_ICache(L1Cache):
is_read_only = True
# Writeback clean lines as well
writeback_clean = True
+
class L1_DCache(L1Cache):
pass
+
class L2Cache(Cache):
assoc = 8
tag_latency = 20
@@ -71,26 +77,28 @@
tgts_per_mshr = 12
write_buffers = 8
+
class IOCache(Cache):
assoc = 8
tag_latency = 50
data_latency = 50
response_latency = 50
mshrs = 20
- size = '1kB'
+ size = "1kB"
tgts_per_mshr = 12
+
class PageTableWalkerCache(Cache):
assoc = 2
tag_latency = 2
data_latency = 2
response_latency = 2
mshrs = 10
- size = '1kB'
+ size = "1kB"
tgts_per_mshr = 12
# the x86 table walker actually writes to the table-walker cache
- if buildEnv['TARGET_ISA'] in ['x86', 'riscv']:
+ if get_runtime_isa() in [ISA.X86, ISA.RISCV]:
is_read_only = False
else:
is_read_only = True
diff --git a/configs/common/CpuConfig.py b/configs/common/CpuConfig.py
index d34143c..1672d43 100644
--- a/configs/common/CpuConfig.py
+++ b/configs/common/CpuConfig.py
@@ -36,6 +36,7 @@
from m5 import fatal
import m5.objects
+
def config_etrace(cpu_cls, cpu_list, options):
if issubclass(cpu_cls, m5.objects.DerivO3CPU):
# Assign the same file name to all cpus for now. This must be
@@ -45,17 +46,21 @@
# file names. Set the dependency window size equal to the cpu it
# is attached to.
cpu.traceListener = m5.objects.ElasticTrace(
- instFetchTraceFile = options.inst_trace_file,
- dataDepTraceFile = options.data_trace_file,
- depWindowSize = 3 * cpu.numROBEntries)
+ instFetchTraceFile=options.inst_trace_file,
+ dataDepTraceFile=options.data_trace_file,
+ depWindowSize=3 * cpu.numROBEntries,
+ )
# Make the number of entries in the ROB, LQ and SQ very
# large so that there are no stalls due to resource
# limitation as such stalls will get captured in the trace
# as compute delay. For replay, ROB, LQ and SQ sizes are
# modelled in the Trace CPU.
- cpu.numROBEntries = 512;
- cpu.LQEntries = 128;
- cpu.SQEntries = 128;
+ cpu.numROBEntries = 512
+ cpu.LQEntries = 128
+ cpu.SQEntries = 128
else:
- fatal("%s does not support data dependency tracing. Use a CPU model of"
- " type or inherited from DerivO3CPU.", cpu_cls)
+ fatal(
+ "%s does not support data dependency tracing. Use a CPU model of"
+ " type or inherited from DerivO3CPU.",
+ cpu_cls,
+ )
diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py
index febe146..5da951c 100644
--- a/configs/common/FSConfig.py
+++ b/configs/common/FSConfig.py
@@ -39,69 +39,87 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import m5
+import m5.defines
from m5.objects import *
from m5.util import *
from common.Benchmarks import *
from common import ObjectList
# Populate to reflect supported os types per target ISA
-os_types = { 'mips' : [ 'linux' ],
- 'riscv' : [ 'linux' ], # TODO that's a lie
- 'sparc' : [ 'linux' ],
- 'x86' : [ 'linux' ],
- 'arm' : [ 'linux',
- 'android-gingerbread',
- 'android-ics',
- 'android-jellybean',
- 'android-kitkat',
- 'android-nougat', ],
- }
+os_types = set()
+if m5.defines.buildEnv["USE_ARM_ISA"]:
+ os_types.update(
+ [
+ "linux",
+ "android-gingerbread",
+ "android-ics",
+ "android-jellybean",
+ "android-kitkat",
+ "android-nougat",
+ ]
+ )
+if m5.defines.buildEnv["USE_MIPS_ISA"]:
+ os_types.add("linux")
+if m5.defines.buildEnv["USE_POWER_ISA"]:
+ os_types.add("linux")
+if m5.defines.buildEnv["USE_RISCV_ISA"]:
+ os_types.add("linux") # TODO that's a lie
+if m5.defines.buildEnv["USE_SPARC_ISA"]:
+ os_types.add("linux")
+if m5.defines.buildEnv["USE_X86_ISA"]:
+ os_types.add("linux")
+
class CowIdeDisk(IdeDisk):
- image = CowDiskImage(child=RawDiskImage(read_only=True),
- read_only=False)
+ image = CowDiskImage(child=RawDiskImage(read_only=True), read_only=False)
def childImage(self, ci):
self.image.child.image_file = ci
+
class MemBus(SystemXBar):
badaddr_responder = BadAddr()
default = Self.badaddr_responder.pio
+
def attach_9p(parent, bus):
viopci = PciVirtIO()
viopci.vio = VirtIO9PDiod()
- viodir = os.path.realpath(os.path.join(m5.options.outdir, '9p'))
- viopci.vio.root = os.path.join(viodir, 'share')
- viopci.vio.socketPath = os.path.join(viodir, 'socket')
+ viodir = os.path.realpath(os.path.join(m5.options.outdir, "9p"))
+ viopci.vio.root = os.path.join(viodir, "share")
+ viopci.vio.socketPath = os.path.join(viodir, "socket")
os.makedirs(viopci.vio.root, exist_ok=True)
if os.path.exists(viopci.vio.socketPath):
os.remove(viopci.vio.socketPath)
parent.viopci = viopci
parent.attachPciDevice(viopci, bus)
+
def fillInCmdline(mdesc, template, **kwargs):
- kwargs.setdefault('rootdev', mdesc.rootdev())
- kwargs.setdefault('mem', mdesc.mem())
- kwargs.setdefault('script', mdesc.script())
+ kwargs.setdefault("rootdev", mdesc.rootdev())
+ kwargs.setdefault("mem", mdesc.mem())
+ kwargs.setdefault("script", mdesc.script())
return template % kwargs
+
def makeCowDisks(disk_paths):
disks = []
for disk_path in disk_paths:
- disk = CowIdeDisk(driveID='device0')
- disk.childImage(disk_path);
+ disk = CowIdeDisk(driveID="device0")
+ disk.childImage(disk_path)
disks.append(disk)
return disks
+
def makeSparcSystem(mem_mode, mdesc=None, cmdline=None):
# Constants from iob.cc and uart8250.cc
iob_man_addr = 0x9800000000
uart_pio_size = 8
class CowMmDisk(MmDisk):
- image = CowDiskImage(child=RawDiskImage(read_only=True),
- read_only=False)
+ image = CowDiskImage(
+ child=RawDiskImage(read_only=True), read_only=False
+ )
def childImage(self, ci):
self.image.child.image_file = ci
@@ -113,12 +131,14 @@
self.readfile = mdesc.script()
self.iobus = IOXBar()
self.membus = MemBus()
- self.bridge = Bridge(delay='50ns')
+ self.bridge = Bridge(delay="50ns")
self.t1000 = T1000()
self.t1000.attachOnChipIO(self.membus)
self.t1000.attachIO(self.iobus)
- self.mem_ranges = [AddrRange(Addr('1MB'), size = '64MB'),
- AddrRange(Addr('2GB'), size ='256MB')]
+ self.mem_ranges = [
+ AddrRange(Addr("1MB"), size="64MB"),
+ AddrRange(Addr("2GB"), size="256MB"),
+ ]
self.bridge.mem_side_port = self.iobus.cpu_side_ports
self.bridge.cpu_side_port = self.membus.mem_side_ports
self.disk0 = CowMmDisk()
@@ -128,36 +148,47 @@
# The puart0 and hvuart are placed on the IO bus, so create ranges
# for them. The remaining IO range is rather fragmented, so poke
# holes for the iob and partition descriptors etc.
- self.bridge.ranges = \
- [
- AddrRange(self.t1000.puart0.pio_addr,
- self.t1000.puart0.pio_addr + uart_pio_size - 1),
- AddrRange(self.disk0.pio_addr,
- self.t1000.fake_jbi.pio_addr +
- self.t1000.fake_jbi.pio_size - 1),
- AddrRange(self.t1000.fake_clk.pio_addr,
- iob_man_addr - 1),
- AddrRange(self.t1000.fake_l2_1.pio_addr,
- self.t1000.fake_ssi.pio_addr +
- self.t1000.fake_ssi.pio_size - 1),
- AddrRange(self.t1000.hvuart.pio_addr,
- self.t1000.hvuart.pio_addr + uart_pio_size - 1)
- ]
+ self.bridge.ranges = [
+ AddrRange(
+ self.t1000.puart0.pio_addr,
+ self.t1000.puart0.pio_addr + uart_pio_size - 1,
+ ),
+ AddrRange(
+ self.disk0.pio_addr,
+ self.t1000.fake_jbi.pio_addr + self.t1000.fake_jbi.pio_size - 1,
+ ),
+ AddrRange(self.t1000.fake_clk.pio_addr, iob_man_addr - 1),
+ AddrRange(
+ self.t1000.fake_l2_1.pio_addr,
+ self.t1000.fake_ssi.pio_addr + self.t1000.fake_ssi.pio_size - 1,
+ ),
+ AddrRange(
+ self.t1000.hvuart.pio_addr,
+ self.t1000.hvuart.pio_addr + uart_pio_size - 1,
+ ),
+ ]
workload = SparcFsWorkload()
# ROM for OBP/Reset/Hypervisor
- self.rom = SimpleMemory(image_file=binary('t1000_rom.bin'),
- range=AddrRange(0xfff0000000, size='8MB'))
+ self.rom = SimpleMemory(
+ image_file=binary("t1000_rom.bin"),
+ range=AddrRange(0xFFF0000000, size="8MB"),
+ )
# nvram
- self.nvram = SimpleMemory(image_file=binary('nvram1'),
- range=AddrRange(0x1f11000000, size='8kB'))
+ self.nvram = SimpleMemory(
+ image_file=binary("nvram1"), range=AddrRange(0x1F11000000, size="8kB")
+ )
# hypervisor description
- self.hypervisor_desc = SimpleMemory(image_file=binary('1up-hv.bin'),
- range=AddrRange(0x1f12080000, size='8kB'))
+ self.hypervisor_desc = SimpleMemory(
+ image_file=binary("1up-hv.bin"),
+ range=AddrRange(0x1F12080000, size="8kB"),
+ )
# partition description
- self.partition_desc = SimpleMemory(image_file=binary('1up-md.bin'),
- range=AddrRange(0x1f12000000, size='8kB'))
+ self.partition_desc = SimpleMemory(
+ image_file=binary("1up-md.bin"),
+ range=AddrRange(0x1F12000000, size="8kB"),
+ )
self.rom.port = self.membus.mem_side_ports
self.nvram.port = self.membus.mem_side_ports
@@ -170,10 +201,20 @@
return self
-def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
- dtb_filename=None, bare_metal=False, cmdline=None,
- external_memory="", ruby=False,
- vio_9p=None, bootloader=None):
+
+def makeArmSystem(
+ mem_mode,
+ machine_type,
+ num_cpus=1,
+ mdesc=None,
+ dtb_filename=None,
+ bare_metal=False,
+ cmdline=None,
+ external_memory="",
+ ruby=False,
+ vio_9p=None,
+ bootloader=None,
+):
assert machine_type
pci_devices = []
@@ -187,7 +228,7 @@
self.readfile = mdesc.script()
self.iobus = IOXBar()
if not ruby:
- self.bridge = Bridge(delay='50ns')
+ self.bridge = Bridge(delay="50ns")
self.bridge.mem_side_port = self.iobus.cpu_side_ports
self.membus = MemBus()
self.membus.badaddr_responder.warn_access = "warn"
@@ -227,13 +268,17 @@
self.mem_ranges.append(AddrRange(region.start, size=size_remain))
size_remain = 0
break
- warn("Memory size specified spans more than one region. Creating" \
- " another memory controller for that range.")
+ warn(
+ "Memory size specified spans more than one region. Creating"
+ " another memory controller for that range."
+ )
if size_remain > 0:
- fatal("The currently selected ARM platforms doesn't support" \
- " the amount of DRAM you've selected. Please try" \
- " another platform")
+ fatal(
+ "The currently selected ARM platforms doesn't support"
+ " the amount of DRAM you've selected. Please try"
+ " another platform"
+ )
if bare_metal:
# EOT character on UART will end the simulation
@@ -245,16 +290,19 @@
if dtb_filename:
workload.dtb_filename = binary(dtb_filename)
- workload.machine_type = \
+ workload.machine_type = (
machine_type if machine_type in ArmMachineType.map else "DTOnly"
+ )
# Ensure that writes to the UART actually go out early in the boot
if not cmdline:
- cmdline = 'earlyprintk=pl011,0x1c090000 console=ttyAMA0 ' + \
- 'lpj=19988480 norandmaps rw loglevel=8 ' + \
- 'mem=%(mem)s root=%(rootdev)s'
+ cmdline = (
+ "earlyprintk=pl011,0x1c090000 console=ttyAMA0 "
+ + "lpj=19988480 norandmaps rw loglevel=8 "
+ + "mem=%(mem)s root=%(rootdev)s"
+ )
- if hasattr(self.realview.gic, 'cpu_addr'):
+ if hasattr(self.realview.gic, "cpu_addr"):
self.gic_cpu_addr = self.realview.gic.cpu_addr
# This check is for users who have previously put 'android' in
@@ -263,30 +311,37 @@
# behavior has been replaced with a more explicit option per
# the error message below. The disk can have any name now and
# doesn't need to include 'android' substring.
- if (mdesc.disks() and
- os.path.split(mdesc.disks()[0])[-1].lower().count('android')):
- if 'android' not in mdesc.os_type():
- fatal("It looks like you are trying to boot an Android " \
- "platform. To boot Android, you must specify " \
- "--os-type with an appropriate Android release on " \
- "the command line.")
+ if mdesc.disks() and os.path.split(mdesc.disks()[0])[-1].lower().count(
+ "android"
+ ):
+ if "android" not in mdesc.os_type():
+ fatal(
+ "It looks like you are trying to boot an Android "
+ "platform. To boot Android, you must specify "
+ "--os-type with an appropriate Android release on "
+ "the command line."
+ )
# android-specific tweaks
- if 'android' in mdesc.os_type():
+ if "android" in mdesc.os_type():
# generic tweaks
cmdline += " init=/init"
# release-specific tweaks
- if 'kitkat' in mdesc.os_type():
- cmdline += " androidboot.hardware=gem5 qemu=1 qemu.gles=0 " + \
- "android.bootanim=0 "
- elif 'nougat' in mdesc.os_type():
- cmdline += " androidboot.hardware=gem5 qemu=1 qemu.gles=0 " + \
- "android.bootanim=0 " + \
- "vmalloc=640MB " + \
- "android.early.fstab=/fstab.gem5 " + \
- "androidboot.selinux=permissive " + \
- "video=Virtual-1:1920x1080-16"
+ if "kitkat" in mdesc.os_type():
+ cmdline += (
+ " androidboot.hardware=gem5 qemu=1 qemu.gles=0 "
+ + "android.bootanim=0 "
+ )
+ elif "nougat" in mdesc.os_type():
+ cmdline += (
+ " androidboot.hardware=gem5 qemu=1 qemu.gles=0 "
+ + "android.bootanim=0 "
+ + "vmalloc=640MB "
+ + "android.early.fstab=/fstab.gem5 "
+ + "androidboot.selinux=permissive "
+ + "video=Virtual-1:1920x1080-16"
+ )
workload.command_line = fillInCmdline(mdesc, cmdline)
@@ -296,14 +351,17 @@
if external_memory:
# I/O traffic enters iobus
- self.external_io = ExternalMaster(port_data="external_io",
- port_type=external_memory)
+ self.external_io = ExternalMaster(
+ port_data="external_io", port_type=external_memory
+ )
self.external_io.port = self.iobus.cpu_side_ports
# Ensure iocache only receives traffic destined for (actual) memory.
- self.iocache = ExternalSlave(port_data="iocache",
- port_type=external_memory,
- addr_ranges=self.mem_ranges)
+ self.iocache = ExternalSlave(
+ port_data="iocache",
+ port_type=external_memory,
+ addr_ranges=self.mem_ranges,
+ )
self.iocache.port = self.iobus.mem_side_ports
# Let system_port get to nvmem and nothing else.
@@ -313,10 +371,11 @@
# Attach off-chip devices
self.realview.attachIO(self.iobus)
elif ruby:
- self._dma_ports = [ ]
- self._mem_ports = [ ]
- self.realview.attachOnChipIO(self.iobus,
- dma_ports=self._dma_ports, mem_ports=self._mem_ports)
+ self._dma_ports = []
+ self._mem_ports = []
+ self.realview.attachOnChipIO(
+ self.iobus, dma_ports=self._dma_ports, mem_ports=self._mem_ports
+ )
self.realview.attachIO(self.iobus, dma_ports=self._dma_ports)
else:
self.realview.attachOnChipIO(self.membus, self.bridge)
@@ -325,8 +384,8 @@
for dev in pci_devices:
self.realview.attachPciDevice(
- dev, self.iobus,
- dma_ports=self._dma_ports if ruby else None)
+ dev, self.iobus, dma_ports=self._dma_ports if ruby else None
+ )
self.terminal = Terminal()
self.vncserver = VncServer()
@@ -338,10 +397,12 @@
self.system_port = self.membus.cpu_side_ports
if ruby:
- if buildEnv['PROTOCOL'] == 'MI_example' and num_cpus > 1:
- fatal("The MI_example protocol cannot implement Load/Store "
- "Exclusive operations. Multicore ARM systems configured "
- "with the MI_example protocol will not work properly.")
+ if buildEnv["PROTOCOL"] == "MI_example" and num_cpus > 1:
+ fatal(
+ "The MI_example protocol cannot implement Load/Store "
+ "Exclusive operations. Multicore ARM systems configured "
+ "with the MI_example protocol will not work properly."
+ )
return self
@@ -349,8 +410,9 @@
def makeLinuxMipsSystem(mem_mode, mdesc=None, cmdline=None):
class BaseMalta(Malta):
ethernet = NSGigE(pci_bus=0, pci_dev=1, pci_func=0)
- ide = IdeController(disks=Parent.disks,
- pci_func=0, pci_dev=0, pci_bus=0)
+ ide = IdeController(
+ disks=Parent.disks, pci_func=0, pci_dev=0, pci_bus=0
+ )
self = System()
if not mdesc:
@@ -359,8 +421,8 @@
self.readfile = mdesc.script()
self.iobus = IOXBar()
self.membus = MemBus()
- self.bridge = Bridge(delay='50ns')
- self.mem_ranges = [AddrRange('1GB')]
+ self.bridge = Bridge(delay="50ns")
+ self.mem_ranges = [AddrRange("1GB")]
self.bridge.mem_side_port = self.iobus.cpu_side_ports
self.bridge.cpu_side_port = self.membus.mem_side_ports
self.disks = makeCowDisks(mdesc.disks())
@@ -370,35 +432,38 @@
self.malta.ide.dma = self.iobus.cpu_side_ports
self.malta.ethernet.pio = self.iobus.mem_side_ports
self.malta.ethernet.dma = self.iobus.cpu_side_ports
- self.simple_disk = SimpleDisk(disk=RawDiskImage(
- image_file = mdesc.disks()[0], read_only = True))
+ self.simple_disk = SimpleDisk(
+ disk=RawDiskImage(image_file=mdesc.disks()[0], read_only=True)
+ )
self.mem_mode = mem_mode
self.terminal = Terminal()
- self.console = binary('mips/console')
+ self.console = binary("mips/console")
if not cmdline:
- cmdline = 'root=/dev/hda1 console=ttyS0'
+ cmdline = "root=/dev/hda1 console=ttyS0"
self.workload = KernelWorkload(command_line=fillInCmdline(mdesc, cmdline))
self.system_port = self.membus.cpu_side_ports
return self
+
def x86IOAddress(port):
IO_address_space_base = 0x8000000000000000
return IO_address_space_base + port
+
def connectX86ClassicSystem(x86_sys, numCPUs):
# Constants similar to x86_traits.hh
IO_address_space_base = 0x8000000000000000
- pci_config_address_space_base = 0xc000000000000000
- interrupts_address_space_base = 0xa000000000000000
- APIC_range_size = 1 << 12;
+ pci_config_address_space_base = 0xC000000000000000
+ interrupts_address_space_base = 0xA000000000000000
+ APIC_range_size = 1 << 12
x86_sys.membus = MemBus()
# North Bridge
x86_sys.iobus = IOXBar()
- x86_sys.bridge = Bridge(delay='50ns')
+ x86_sys.bridge = Bridge(delay="50ns")
x86_sys.bridge.mem_side_port = x86_sys.iobus.cpu_side_ports
x86_sys.bridge.cpu_side_port = x86_sys.membus.mem_side_ports
# Allow the bridge to pass through:
@@ -407,30 +472,30 @@
# 2) the bridge to pass through the IO APIC (two pages, already contained in 1),
# 3) everything in the IO address range up to the local APIC, and
# 4) then the entire PCI address space and beyond.
- x86_sys.bridge.ranges = \
- [
+ x86_sys.bridge.ranges = [
AddrRange(0xC0000000, 0xFFFF0000),
- AddrRange(IO_address_space_base,
- interrupts_address_space_base - 1),
- AddrRange(pci_config_address_space_base,
- Addr.max)
- ]
+ AddrRange(IO_address_space_base, interrupts_address_space_base - 1),
+ AddrRange(pci_config_address_space_base, Addr.max),
+ ]
# Create a bridge from the IO bus to the memory bus to allow access to
# the local APIC (two pages)
- x86_sys.apicbridge = Bridge(delay='50ns')
+ x86_sys.apicbridge = Bridge(delay="50ns")
x86_sys.apicbridge.cpu_side_port = x86_sys.iobus.mem_side_ports
x86_sys.apicbridge.mem_side_port = x86_sys.membus.cpu_side_ports
- x86_sys.apicbridge.ranges = [AddrRange(interrupts_address_space_base,
- interrupts_address_space_base +
- numCPUs * APIC_range_size
- - 1)]
+ x86_sys.apicbridge.ranges = [
+ AddrRange(
+ interrupts_address_space_base,
+ interrupts_address_space_base + numCPUs * APIC_range_size - 1,
+ )
+ ]
# connect the io bus
x86_sys.pc.attachIO(x86_sys.iobus)
x86_sys.system_port = x86_sys.membus.cpu_side_ports
+
def connectX86RubySystem(x86_sys):
# North Bridge
x86_sys.iobus = IOXBar()
@@ -444,7 +509,7 @@
def makeX86System(mem_mode, numCPUs=1, mdesc=None, workload=None, Ruby=False):
self = System()
- self.m5ops_base = 0xffff0000
+ self.m5ops_base = 0xFFFF0000
if workload is None:
workload = X86FsWorkload()
@@ -461,17 +526,22 @@
# On the PC platform, the memory region 0xC0000000-0xFFFFFFFF is reserved
# for various devices. Hence, if the physical memory size is greater than
# 3GB, we need to split it into two parts.
- excess_mem_size = \
- convert.toMemorySize(mdesc.mem()) - convert.toMemorySize('3GB')
+ excess_mem_size = convert.toMemorySize(mdesc.mem()) - convert.toMemorySize(
+ "3GB"
+ )
if excess_mem_size <= 0:
self.mem_ranges = [AddrRange(mdesc.mem())]
else:
- warn("Physical memory size specified is %s which is greater than " \
- "3GB. Twice the number of memory controllers would be " \
- "created." % (mdesc.mem()))
+ warn(
+ "Physical memory size specified is %s which is greater than "
+ "3GB. Twice the number of memory controllers would be "
+ "created." % (mdesc.mem())
+ )
- self.mem_ranges = [AddrRange('3GB'),
- AddrRange(Addr('4GB'), size = excess_mem_size)]
+ self.mem_ranges = [
+ AddrRange("3GB"),
+ AddrRange(Addr("4GB"), size=excess_mem_size),
+ ]
# Platform
self.pc = Pc()
@@ -496,78 +566,78 @@
madt_records = []
for i in range(numCPUs):
bp = X86IntelMPProcessor(
- local_apic_id = i,
- local_apic_version = 0x14,
- enable = True,
- bootstrap = (i == 0))
+ local_apic_id=i,
+ local_apic_version=0x14,
+ enable=True,
+ bootstrap=(i == 0),
+ )
base_entries.append(bp)
- lapic = X86ACPIMadtLAPIC(
- acpi_processor_id=i,
- apic_id=i,
- flags=1)
+ lapic = X86ACPIMadtLAPIC(acpi_processor_id=i, apic_id=i, flags=1)
madt_records.append(lapic)
io_apic = X86IntelMPIOAPIC(
- id = numCPUs,
- version = 0x11,
- enable = True,
- address = 0xfec00000)
+ id=numCPUs, version=0x11, enable=True, address=0xFEC00000
+ )
self.pc.south_bridge.io_apic.apic_id = io_apic.id
base_entries.append(io_apic)
- madt_records.append(X86ACPIMadtIOAPIC(id=io_apic.id,
- address=io_apic.address, int_base=0))
+ madt_records.append(
+ X86ACPIMadtIOAPIC(id=io_apic.id, address=io_apic.address, int_base=0)
+ )
# In gem5 Pc::calcPciConfigAddr(), it required "assert(bus==0)",
# but linux kernel cannot config PCI device if it was not connected to
# PCI bus, so we fix PCI bus id to 0, and ISA bus id to 1.
- pci_bus = X86IntelMPBus(bus_id = 0, bus_type='PCI ')
+ pci_bus = X86IntelMPBus(bus_id=0, bus_type="PCI ")
base_entries.append(pci_bus)
- isa_bus = X86IntelMPBus(bus_id = 1, bus_type='ISA ')
+ isa_bus = X86IntelMPBus(bus_id=1, bus_type="ISA ")
base_entries.append(isa_bus)
- connect_busses = X86IntelMPBusHierarchy(bus_id=1,
- subtractive_decode=True, parent_bus=0)
+ connect_busses = X86IntelMPBusHierarchy(
+ bus_id=1, subtractive_decode=True, parent_bus=0
+ )
ext_entries.append(connect_busses)
pci_dev4_inta = X86IntelMPIOIntAssignment(
- interrupt_type = 'INT',
- polarity = 'ConformPolarity',
- trigger = 'ConformTrigger',
- source_bus_id = 0,
- source_bus_irq = 0 + (4 << 2),
- dest_io_apic_id = io_apic.id,
- dest_io_apic_intin = 16)
+ interrupt_type="INT",
+ polarity="ConformPolarity",
+ trigger="ConformTrigger",
+ source_bus_id=0,
+ source_bus_irq=0 + (4 << 2),
+ dest_io_apic_id=io_apic.id,
+ dest_io_apic_intin=16,
+ )
base_entries.append(pci_dev4_inta)
pci_dev4_inta_madt = X86ACPIMadtIntSourceOverride(
- bus_source = pci_dev4_inta.source_bus_id,
- irq_source = pci_dev4_inta.source_bus_irq,
- sys_int = pci_dev4_inta.dest_io_apic_intin,
- flags = 0
- )
+ bus_source=pci_dev4_inta.source_bus_id,
+ irq_source=pci_dev4_inta.source_bus_irq,
+ sys_int=pci_dev4_inta.dest_io_apic_intin,
+ flags=0,
+ )
madt_records.append(pci_dev4_inta_madt)
+
def assignISAInt(irq, apicPin):
assign_8259_to_apic = X86IntelMPIOIntAssignment(
- interrupt_type = 'ExtInt',
- polarity = 'ConformPolarity',
- trigger = 'ConformTrigger',
- source_bus_id = 1,
- source_bus_irq = irq,
- dest_io_apic_id = io_apic.id,
- dest_io_apic_intin = 0)
+ interrupt_type="ExtInt",
+ polarity="ConformPolarity",
+ trigger="ConformTrigger",
+ source_bus_id=1,
+ source_bus_irq=irq,
+ dest_io_apic_id=io_apic.id,
+ dest_io_apic_intin=0,
+ )
base_entries.append(assign_8259_to_apic)
assign_to_apic = X86IntelMPIOIntAssignment(
- interrupt_type = 'INT',
- polarity = 'ConformPolarity',
- trigger = 'ConformTrigger',
- source_bus_id = 1,
- source_bus_irq = irq,
- dest_io_apic_id = io_apic.id,
- dest_io_apic_intin = apicPin)
+ interrupt_type="INT",
+ polarity="ConformPolarity",
+ trigger="ConformTrigger",
+ source_bus_id=1,
+ source_bus_irq=irq,
+ dest_io_apic_id=io_apic.id,
+ dest_io_apic_intin=apicPin,
+ )
base_entries.append(assign_to_apic)
# acpi
assign_to_apic_acpi = X86ACPIMadtIntSourceOverride(
- bus_source = 1,
- irq_source = irq,
- sys_int = apicPin,
- flags = 0
- )
+ bus_source=1, irq_source=irq, sys_int=apicPin, flags=0
+ )
madt_records.append(assign_to_apic_acpi)
+
assignISAInt(0, 2)
assignISAInt(1, 1)
for i in range(3, 15):
@@ -575,64 +645,78 @@
workload.intel_mp_table.base_entries = base_entries
workload.intel_mp_table.ext_entries = ext_entries
- madt = X86ACPIMadt(local_apic_address=0,
- records=madt_records, oem_id='madt')
+ madt = X86ACPIMadt(
+ local_apic_address=0, records=madt_records, oem_id="madt"
+ )
workload.acpi_description_table_pointer.rsdt.entries.append(madt)
workload.acpi_description_table_pointer.xsdt.entries.append(madt)
- workload.acpi_description_table_pointer.oem_id = 'gem5'
- workload.acpi_description_table_pointer.rsdt.oem_id='gem5'
- workload.acpi_description_table_pointer.xsdt.oem_id='gem5'
+ workload.acpi_description_table_pointer.oem_id = "gem5"
+ workload.acpi_description_table_pointer.rsdt.oem_id = "gem5"
+ workload.acpi_description_table_pointer.xsdt.oem_id = "gem5"
return self
-def makeLinuxX86System(mem_mode, numCPUs=1, mdesc=None, Ruby=False,
- cmdline=None):
+
+def makeLinuxX86System(
+ mem_mode, numCPUs=1, mdesc=None, Ruby=False, cmdline=None
+):
# Build up the x86 system and then specialize it for Linux
self = makeX86System(mem_mode, numCPUs, mdesc, X86FsLinux(), Ruby)
# We assume below that there's at least 1MB of memory. We'll require 2
# just to avoid corner cases.
phys_mem_size = sum([r.size() for r in self.mem_ranges])
- assert(phys_mem_size >= 0x200000)
- assert(len(self.mem_ranges) <= 2)
+ assert phys_mem_size >= 0x200000
+ assert len(self.mem_ranges) <= 2
- entries = \
- [
+ entries = [
# Mark the first megabyte of memory as reserved
- X86E820Entry(addr = 0, size = '639kB', range_type = 1),
- X86E820Entry(addr = 0x9fc00, size = '385kB', range_type = 2),
+ X86E820Entry(addr=0, size="639kB", range_type=1),
+ X86E820Entry(addr=0x9FC00, size="385kB", range_type=2),
# Mark the rest of physical memory as available
- X86E820Entry(addr = 0x100000,
- size = '%dB' % (self.mem_ranges[0].size() - 0x100000),
- range_type = 1),
- ]
+ X86E820Entry(
+ addr=0x100000,
+ size="%dB" % (self.mem_ranges[0].size() - 0x100000),
+ range_type=1,
+ ),
+ ]
# Mark [mem_size, 3GB) as reserved if memory less than 3GB, which force
# IO devices to be mapped to [0xC0000000, 0xFFFF0000). Requests to this
# specific range can pass though bridge to iobus.
if len(self.mem_ranges) == 1:
- entries.append(X86E820Entry(addr = self.mem_ranges[0].size(),
- size='%dB' % (0xC0000000 - self.mem_ranges[0].size()),
- range_type=2))
+ entries.append(
+ X86E820Entry(
+ addr=self.mem_ranges[0].size(),
+ size="%dB" % (0xC0000000 - self.mem_ranges[0].size()),
+ range_type=2,
+ )
+ )
# Reserve the last 16kB of the 32-bit address space for the m5op interface
- entries.append(X86E820Entry(addr=0xFFFF0000, size='64kB', range_type=2))
+ entries.append(X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2))
# In case the physical memory is greater than 3GB, we split it into two
# parts and add a separate e820 entry for the second part. This entry
# starts at 0x100000000, which is the first address after the space
# reserved for devices.
if len(self.mem_ranges) == 2:
- entries.append(X86E820Entry(addr = 0x100000000,
- size = '%dB' % (self.mem_ranges[1].size()), range_type = 1))
+ entries.append(
+ X86E820Entry(
+ addr=0x100000000,
+ size="%dB" % (self.mem_ranges[1].size()),
+ range_type=1,
+ )
+ )
self.workload.e820_table.entries = entries
# Command line
if not cmdline:
- cmdline = 'earlyprintk=ttyS0 console=ttyS0 lpj=7999923 root=/dev/hda1'
+ cmdline = "earlyprintk=ttyS0 console=ttyS0 lpj=7999923 root=/dev/hda1"
self.workload.command_line = fillInCmdline(mdesc, cmdline)
return self
+
def makeBareMetalRiscvSystem(mem_mode, mdesc=None, cmdline=None):
self = System()
if not mdesc:
@@ -646,7 +730,7 @@
self.iobus = IOXBar()
self.membus = MemBus()
- self.bridge = Bridge(delay='50ns')
+ self.bridge = Bridge(delay="50ns")
self.bridge.mem_side_port = self.iobus.cpu_side_ports
self.bridge.cpu_side_port = self.membus.mem_side_ports
# Sv39 has 56 bit physical addresses; use the upper 8 bit for the IO space
@@ -656,16 +740,17 @@
self.system_port = self.membus.cpu_side_ports
return self
+
def makeDualRoot(full_system, testSystem, driveSystem, dumpfile):
- self = Root(full_system = full_system)
+ self = Root(full_system=full_system)
self.testsys = testSystem
self.drivesys = driveSystem
self.etherlink = EtherLink()
- if hasattr(testSystem, 'realview'):
+ if hasattr(testSystem, "realview"):
self.etherlink.int0 = Parent.testsys.realview.ethernet.interface
self.etherlink.int1 = Parent.drivesys.realview.ethernet.interface
- elif hasattr(testSystem, 'tsunami'):
+ elif hasattr(testSystem, "tsunami"):
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
self.etherlink.int1 = Parent.drivesys.tsunami.ethernet.interface
else:
@@ -678,31 +763,35 @@
return self
-def makeDistRoot(testSystem,
- rank,
- size,
- server_name,
- server_port,
- sync_repeat,
- sync_start,
- linkspeed,
- linkdelay,
- dumpfile):
- self = Root(full_system = True)
+def makeDistRoot(
+ testSystem,
+ rank,
+ size,
+ server_name,
+ server_port,
+ sync_repeat,
+ sync_start,
+ linkspeed,
+ linkdelay,
+ dumpfile,
+):
+ self = Root(full_system=True)
self.testsys = testSystem
- self.etherlink = DistEtherLink(speed = linkspeed,
- delay = linkdelay,
- dist_rank = rank,
- dist_size = size,
- server_name = server_name,
- server_port = server_port,
- sync_start = sync_start,
- sync_repeat = sync_repeat)
+ self.etherlink = DistEtherLink(
+ speed=linkspeed,
+ delay=linkdelay,
+ dist_rank=rank,
+ dist_size=size,
+ server_name=server_name,
+ server_port=server_port,
+ sync_start=sync_start,
+ sync_repeat=sync_repeat,
+ )
- if hasattr(testSystem, 'realview'):
+ if hasattr(testSystem, "realview"):
self.etherlink.int0 = Parent.testsys.realview.ethernet.interface
- elif hasattr(testSystem, 'tsunami'):
+ elif hasattr(testSystem, "tsunami"):
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
else:
fatal("Don't know how to connect DistEtherLink to this system")
diff --git a/configs/common/FileSystemConfig.py b/configs/common/FileSystemConfig.py
index f60bf23..066eb9a 100644
--- a/configs/common/FileSystemConfig.py
+++ b/configs/common/FileSystemConfig.py
@@ -48,21 +48,25 @@
from os.path import isdir
from shutil import rmtree, copyfile
+
def hex_mask(terms):
dec_mask = reduce(operator.or_, [2**i for i in terms], 0)
return "%08x" % dec_mask
+
def file_append(path, contents):
- with open(joinpath(*path), 'a') as f:
+ with open(joinpath(*path), "a") as f:
f.write(str(contents))
+
def replace_tree(path):
if isdir(path):
rmtree(path)
mkdir(path)
-def config_filesystem(system, options = None):
- """ This function parses the system object to create the pseudo file system
+
+def config_filesystem(system, options=None):
+ """This function parses the system object to create the pseudo file system
@param system: The system to create the config for
@param options: An optional argument which contains an Options.py options
object. This is useful if when use se.py and will set the L2 cache
@@ -79,167 +83,200 @@
These files are created in the `fs` directory in the outdir path.
"""
- fsdir = joinpath(m5.options.outdir, 'fs')
+ fsdir = joinpath(m5.options.outdir, "fs")
replace_tree(fsdir)
# Set up /proc
- procdir = joinpath(fsdir, 'proc')
+ procdir = joinpath(fsdir, "proc")
mkdir(procdir)
try:
- cpus = \
- [obj for obj in system.descendants() if isinstance(obj, BaseCPU)]
+ cpus = [
+ obj for obj in system.descendants() if isinstance(obj, BaseCPU)
+ ]
except NameError:
# BaseCPU is not defined for the NULL ISA
cpus = []
cpu_clock = 0
- if hasattr(options, 'cpu_clock'):
+ if hasattr(options, "cpu_clock"):
cpu_clock = toFrequency(options.cpu_clock) / mega
l2_size = 0
- if hasattr(options, 'l2_size'):
+ if hasattr(options, "l2_size"):
l2_size = toMemorySize(options.l2_size) / kibi
- for i,cpu in enumerate(cpus):
- one_cpu = 'processor : {proc}\n' + \
- 'vendor_id : Generic\n' + \
- 'cpu family : 0\n' + \
- 'model : 0\n' + \
- 'model name : Generic\n' + \
- 'stepping : 0\n' + \
- 'cpu MHz : {clock:0.3f}\n' + \
- 'cache size: : {l2_size}K\n' + \
- 'physical id : 0\n' + \
- 'siblings : {num_cpus}\n' + \
- 'core id : {proc}\n' + \
- 'cpu cores : {num_cpus}\n' + \
- 'fpu : yes\n' + \
- 'fpu exception : yes\n' + \
- 'cpuid level : 1\n' + \
- 'wp : yes\n' + \
- 'flags : fpu\n' + \
- 'cache alignment : {cacheline_size}\n' + \
- '\n'
- one_cpu = one_cpu.format(proc = i, num_cpus = len(cpus),
- # Note: it would be nice to use cpu.clock, but it hasn't
- # been finalized yet since m5.instantiate() isn't done.
- clock = cpu_clock,
- # Note: this assumes the L2 is private to each core
- l2_size = l2_size,
- cacheline_size=system.cache_line_size.getValue())
- file_append((procdir, 'cpuinfo'), one_cpu)
+ for i, cpu in enumerate(cpus):
+ one_cpu = (
+ "processor : {proc}\n"
+ + "vendor_id : Generic\n"
+ + "cpu family : 0\n"
+ + "model : 0\n"
+ + "model name : Generic\n"
+ + "stepping : 0\n"
+ + "cpu MHz : {clock:0.3f}\n"
+ + "cache size: : {l2_size}K\n"
+ + "physical id : 0\n"
+ + "siblings : {num_cpus}\n"
+ + "core id : {proc}\n"
+ + "cpu cores : {num_cpus}\n"
+ + "fpu : yes\n"
+ + "fpu exception : yes\n"
+ + "cpuid level : 1\n"
+ + "wp : yes\n"
+ + "flags : fpu\n"
+ + "cache alignment : {cacheline_size}\n"
+ + "\n"
+ )
+ one_cpu = one_cpu.format(
+ proc=i,
+ num_cpus=len(cpus),
+ # Note: it would be nice to use cpu.clock, but it hasn't
+ # been finalized yet since m5.instantiate() isn't done.
+ clock=cpu_clock,
+ # Note: this assumes the L2 is private to each core
+ l2_size=l2_size,
+ cacheline_size=system.cache_line_size.getValue(),
+ )
+ file_append((procdir, "cpuinfo"), one_cpu)
- file_append((procdir, 'stat'), 'cpu 0 0 0 0 0 0 0\n')
+ file_append((procdir, "stat"), "cpu 0 0 0 0 0 0 0\n")
for i in range(len(cpus)):
- file_append((procdir, 'stat'), 'cpu%d 0 0 0 0 0 0 0\n' % i)
+ file_append((procdir, "stat"), "cpu%d 0 0 0 0 0 0 0\n" % i)
# Set up /sys
- sysdir = joinpath(fsdir, 'sys')
+ sysdir = joinpath(fsdir, "sys")
mkdir(sysdir)
# Set up /sys/devices/system/cpu
- cpudir = joinpath(sysdir, 'devices', 'system', 'cpu')
+ cpudir = joinpath(sysdir, "devices", "system", "cpu")
makedirs(cpudir, exist_ok=True)
- file_append((cpudir, 'online'), '0-%d' % (len(cpus) - 1))
- file_append((cpudir, 'possible'), '0-%d' % (len(cpus) - 1))
+ file_append((cpudir, "online"), "0-%d" % (len(cpus) - 1))
+ file_append((cpudir, "possible"), "0-%d" % (len(cpus) - 1))
# Set up /tmp
- tmpdir = joinpath(fsdir, 'tmp')
+ tmpdir = joinpath(fsdir, "tmp")
replace_tree(tmpdir)
system.redirect_paths = _redirect_paths(options)
# Setting the interpreter path. This is used to load the
# guest dynamic linker itself from the elf file.
- interp = getattr(options, 'interp_dir', None)
+ interp = getattr(options, "interp_dir", None)
if interp:
from m5.core import setInterpDir
+
setInterpDir(interp)
- print("Setting the interpreter path to:", interp,
- "\nFor dynamically linked applications you might still "
- "need to setup the --redirects so that libraries are "
- "found\n")
+ print(
+ "Setting the interpreter path to:",
+ interp,
+ "\nFor dynamically linked applications you might still "
+ "need to setup the --redirects so that libraries are "
+ "found\n",
+ )
+
def register_node(cpu_list, mem, node_number):
- nodebasedir = joinpath(m5.options.outdir, 'fs', 'sys', 'devices',
- 'system', 'node')
+ nodebasedir = joinpath(
+ m5.options.outdir, "fs", "sys", "devices", "system", "node"
+ )
- nodedir = joinpath(nodebasedir,'node%d' % node_number)
+ nodedir = joinpath(nodebasedir, "node%d" % node_number)
makedirs(nodedir, exist_ok=True)
- file_append((nodedir, 'cpumap'), hex_mask(cpu_list))
- file_append((nodedir, 'meminfo'),
- 'Node %d MemTotal: %dkB' % (node_number,
- toMemorySize(str(mem))/kibi))
+ file_append((nodedir, "cpumap"), hex_mask(cpu_list))
+ file_append(
+ (nodedir, "meminfo"),
+ "Node %d MemTotal: %dkB"
+ % (node_number, toMemorySize(str(mem)) / kibi),
+ )
-def register_cpu(physical_package_id, core_siblings,
- core_id, thread_siblings):
- cpudir = joinpath(m5.options.outdir, 'fs', 'sys', 'devices', 'system',
- 'cpu', 'cpu%d' % core_id)
- makedirs(joinpath(cpudir, 'topology'), exist_ok=True)
- makedirs(joinpath(cpudir, 'cache'))
+def register_cpu(physical_package_id, core_siblings, core_id, thread_siblings):
+ cpudir = joinpath(
+ m5.options.outdir,
+ "fs",
+ "sys",
+ "devices",
+ "system",
+ "cpu",
+ "cpu%d" % core_id,
+ )
- file_append((cpudir, 'online'), '1')
- file_append((cpudir, 'topology', 'physical_package_id'),
- physical_package_id)
- file_append((cpudir, 'topology', 'core_siblings'),
- hex_mask(core_siblings))
- file_append((cpudir, 'topology', 'core_id'), core_id)
- file_append((cpudir, 'topology', 'thread_siblings'),
- hex_mask(thread_siblings))
+ makedirs(joinpath(cpudir, "topology"), exist_ok=True)
+ makedirs(joinpath(cpudir, "cache"))
+
+ file_append((cpudir, "online"), "1")
+ file_append(
+ (cpudir, "topology", "physical_package_id"), physical_package_id
+ )
+ file_append((cpudir, "topology", "core_siblings"), hex_mask(core_siblings))
+ file_append((cpudir, "topology", "core_id"), core_id)
+ file_append(
+ (cpudir, "topology", "thread_siblings"), hex_mask(thread_siblings)
+ )
+
def register_cache(level, idu_type, size, line_size, assoc, cpus):
- fsdir = joinpath(m5.options.outdir, 'fs')
+ fsdir = joinpath(m5.options.outdir, "fs")
for i in cpus:
- cachedir = joinpath(fsdir, 'sys', 'devices', 'system', 'cpu',
- 'cpu%d' % i, 'cache')
+ cachedir = joinpath(
+ fsdir, "sys", "devices", "system", "cpu", "cpu%d" % i, "cache"
+ )
j = 0
- while isdir(joinpath(cachedir, 'index%d' % j)):
+ while isdir(joinpath(cachedir, "index%d" % j)):
j += 1
- indexdir = joinpath(cachedir, 'index%d' % j)
+ indexdir = joinpath(cachedir, "index%d" % j)
makedirs(indexdir, exist_ok=True)
- file_append((indexdir, 'level'), level)
- file_append((indexdir, 'type'), idu_type)
- file_append((indexdir, 'size'), "%dK" % (toMemorySize(size)/kibi))
- file_append((indexdir, 'coherency_line_size'), line_size)
+ file_append((indexdir, "level"), level)
+ file_append((indexdir, "type"), idu_type)
+ file_append((indexdir, "size"), "%dK" % (toMemorySize(size) / kibi))
+ file_append((indexdir, "coherency_line_size"), line_size)
# Since cache size = number of indices * associativity * block size
num_sets = toMemorySize(size) / int(assoc) * int(line_size)
- file_append((indexdir, 'number_of_sets'), num_sets)
- file_append((indexdir, 'physical_line_partition'), '1')
- file_append((indexdir, 'shared_cpu_map'), hex_mask(cpus))
- file_append((indexdir, 'shared_cpu_list'),
- ','.join(str(cpu) for cpu in cpus))
+ file_append((indexdir, "number_of_sets"), num_sets)
+ file_append((indexdir, "physical_line_partition"), "1")
+ file_append((indexdir, "shared_cpu_map"), hex_mask(cpus))
+ file_append(
+ (indexdir, "shared_cpu_list"), ",".join(str(cpu) for cpu in cpus)
+ )
+
def _redirect_paths(options):
# Redirect filesystem syscalls from src to the first matching dests
- redirect_paths = [RedirectPath(app_path = "/proc",
- host_paths = ["%s/fs/proc" % m5.options.outdir]),
- RedirectPath(app_path = "/sys",
- host_paths = ["%s/fs/sys" % m5.options.outdir]),
- RedirectPath(app_path = "/tmp",
- host_paths = ["%s/fs/tmp" % m5.options.outdir])]
+ redirect_paths = [
+ RedirectPath(
+ app_path="/proc", host_paths=["%s/fs/proc" % m5.options.outdir]
+ ),
+ RedirectPath(
+ app_path="/sys", host_paths=["%s/fs/sys" % m5.options.outdir]
+ ),
+ RedirectPath(
+ app_path="/tmp", host_paths=["%s/fs/tmp" % m5.options.outdir]
+ ),
+ ]
# Setting the redirect paths so that the guest dynamic linker
# can point to the proper /lib collection (e.g. to load libc)
- redirects = getattr(options, 'redirects', [])
+ redirects = getattr(options, "redirects", [])
for redirect in redirects:
app_path, host_path = redirect.split("=")
redirect_paths.append(
- RedirectPath(app_path = app_path, host_paths = [ host_path ]))
+ RedirectPath(app_path=app_path, host_paths=[host_path])
+ )
- chroot = getattr(options, 'chroot', None)
+ chroot = getattr(options, "chroot", None)
if chroot:
redirect_paths.append(
RedirectPath(
- app_path = "/",
- host_paths = ["%s" % os.path.expanduser(chroot)]))
+ app_path="/", host_paths=["%s" % os.path.expanduser(chroot)]
+ )
+ )
return redirect_paths
diff --git a/configs/common/GPUTLBConfig.py b/configs/common/GPUTLBConfig.py
index 740c748..b70d6c5 100644
--- a/configs/common/GPUTLBConfig.py
+++ b/configs/common/GPUTLBConfig.py
@@ -34,10 +34,12 @@
import m5
from m5.objects import *
+
def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):
if full_system:
- constructor_call = "VegaGPUTLB(\
+ constructor_call = (
+ "VegaGPUTLB(\
gpu_device = gpu_ctrl, \
size = options.L%(level)dTLBentries, \
assoc = options.L%(level)dTLBassoc, \
@@ -48,9 +50,12 @@
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
- voltage = options.gpu_voltage)))" % locals()
+ voltage = options.gpu_voltage)))"
+ % locals()
+ )
else:
- constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
+ constructor_call = (
+ "X86GPUTLB(size = options.L%(level)dTLBentries, \
assoc = options.L%(level)dTLBassoc, \
hitLatency = options.L%(level)dAccessLatency,\
missLatency2 = options.L%(level)dMissLatency,\
@@ -59,13 +64,17 @@
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
- voltage = options.gpu_voltage)))" % locals()
+ voltage = options.gpu_voltage)))"
+ % locals()
+ )
return constructor_call
+
def Coalescer_constructor(options, level, full_system):
if full_system:
- constructor_call = "VegaTLBCoalescer(probesPerCycle = \
+ constructor_call = (
+ "VegaTLBCoalescer(probesPerCycle = \
options.L%(level)dProbesPerCycle, \
tlb_level = %(level)d ,\
coalescingWindow = options.L%(level)dCoalescingWindow,\
@@ -73,30 +82,47 @@
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
- voltage = options.gpu_voltage)))" % locals()
+ voltage = options.gpu_voltage)))"
+ % locals()
+ )
else:
- constructor_call = "TLBCoalescer(probesPerCycle = \
+ constructor_call = (
+ "TLBCoalescer(probesPerCycle = \
options.L%(level)dProbesPerCycle, \
coalescingWindow = options.L%(level)dCoalescingWindow,\
disableCoalescing = options.L%(level)dDisableCoalescing,\
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
- voltage = options.gpu_voltage)))" % locals()
+ voltage = options.gpu_voltage)))"
+ % locals()
+ )
return constructor_call
-def create_TLB_Coalescer(options, my_level, my_index, tlb_name,
- coalescer_name, gpu_ctrl=None, full_system=False):
+
+def create_TLB_Coalescer(
+ options,
+ my_level,
+ my_index,
+ tlb_name,
+ coalescer_name,
+ gpu_ctrl=None,
+ full_system=False,
+):
# arguments: options, TLB level, number of private structures for this
# Level, TLB name and Coalescer name
for i in range(my_index):
tlb_name.append(
- eval(TLB_constructor(options, my_level, gpu_ctrl, full_system)))
+ eval(TLB_constructor(options, my_level, gpu_ctrl, full_system))
+ )
coalescer_name.append(
- eval(Coalescer_constructor(options, my_level, full_system)))
+ eval(Coalescer_constructor(options, my_level, full_system))
+ )
-def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None,
- full_system=False):
+
+def config_tlb_hierarchy(
+ options, system, shader_idx, gpu_ctrl=None, full_system=False
+):
n_cu = options.num_compute_units
if options.TLB_config == "perLane":
@@ -111,36 +137,50 @@
print("Bad option for TLB Configuration.")
sys.exit(1)
- #-------------------------------------------------------------------------
+ # -------------------------------------------------------------------------
# A visual representation of the TLB hierarchy
# for ease of configuration
# < Modify here the width and the number of levels if you want a different
# configuration >
# width is the number of TLBs of the given type (i.e., D-TLB, I-TLB etc)
# for this level
- L1 = [{'name': 'sqc', 'width': options.num_sqc, 'TLBarray': [],
- 'CoalescerArray': []},
- {'name': 'scalar', 'width' : options.num_scalar_cache,
- 'TLBarray': [], 'CoalescerArray': []},
- {'name': 'l1', 'width': num_TLBs, 'TLBarray': [],
- 'CoalescerArray': []}]
+ L1 = [
+ {
+ "name": "sqc",
+ "width": options.num_sqc,
+ "TLBarray": [],
+ "CoalescerArray": [],
+ },
+ {
+ "name": "scalar",
+ "width": options.num_scalar_cache,
+ "TLBarray": [],
+ "CoalescerArray": [],
+ },
+ {
+ "name": "l1",
+ "width": num_TLBs,
+ "TLBarray": [],
+ "CoalescerArray": [],
+ },
+ ]
- L2 = [{'name': 'l2', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
- L3 = [{'name': 'l3', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
+ L2 = [{"name": "l2", "width": 1, "TLBarray": [], "CoalescerArray": []}]
+ L3 = [{"name": "l3", "width": 1, "TLBarray": [], "CoalescerArray": []}]
TLB_hierarchy = [L1, L2, L3]
- #-------------------------------------------------------------------------
+ # -------------------------------------------------------------------------
# Create the hiearchy
# Call the appropriate constructors and add objects to the system
for i in range(len(TLB_hierarchy)):
hierarchy_level = TLB_hierarchy[i]
- level = i+1
+ level = i + 1
for TLB_type in hierarchy_level:
- TLB_index = TLB_type['width']
- TLB_array = TLB_type['TLBarray']
- Coalescer_array = TLB_type['CoalescerArray']
+ TLB_index = TLB_type["width"]
+ TLB_array = TLB_type["TLBarray"]
+ Coalescer_array = TLB_type["CoalescerArray"]
# If the sim calls for a fixed L1 TLB size across CUs,
# override the TLB entries option
if options.tot_L1TLB_size:
@@ -148,71 +188,96 @@
if options.L1TLBassoc > options.L1TLBentries:
options.L1TLBassoc = options.L1TLBentries
# call the constructors for the TLB and the Coalescer
- create_TLB_Coalescer(options, level, TLB_index,\
- TLB_array, Coalescer_array, gpu_ctrl, full_system)
+ create_TLB_Coalescer(
+ options,
+ level,
+ TLB_index,
+ TLB_array,
+ Coalescer_array,
+ gpu_ctrl,
+ full_system,
+ )
- system_TLB_name = TLB_type['name'] + '_tlb'
- system_Coalescer_name = TLB_type['name'] + '_coalescer'
+ system_TLB_name = TLB_type["name"] + "_tlb"
+ system_Coalescer_name = TLB_type["name"] + "_coalescer"
# add the different TLB levels to the system
# Modify here if you want to make the TLB hierarchy a child of
# the shader.
- exec('system.%s = TLB_array' % system_TLB_name)
- exec('system.%s = Coalescer_array' % system_Coalescer_name)
+ exec("system.%s = TLB_array" % system_TLB_name)
+ exec("system.%s = Coalescer_array" % system_Coalescer_name)
- #===========================================================
+ # ===========================================================
# Specify the TLB hierarchy (i.e., port connections)
# All TLBs but the last level TLB need to have a memSidePort
- #===========================================================
+ # ===========================================================
# Each TLB is connected with its Coalescer through a single port.
# There is a one-to-one mapping of TLBs to Coalescers at a given level
# This won't be modified no matter what the hierarchy looks like.
for i in range(len(TLB_hierarchy)):
hierarchy_level = TLB_hierarchy[i]
- level = i+1
+ level = i + 1
for TLB_type in hierarchy_level:
- name = TLB_type['name']
- for index in range(TLB_type['width']):
- exec('system.%s_coalescer[%d].mem_side_ports[0] = \
- system.%s_tlb[%d].cpu_side_ports[0]' % \
- (name, index, name, index))
+ name = TLB_type["name"]
+ for index in range(TLB_type["width"]):
+ exec(
+ "system.%s_coalescer[%d].mem_side_ports[0] = \
+ system.%s_tlb[%d].cpu_side_ports[0]"
+ % (name, index, name, index)
+ )
# Connect the cpuSidePort of all the coalescers in level 1
# < Modify here if you want a different configuration >
for TLB_type in L1:
- name = TLB_type['name']
- num_TLBs = TLB_type['width']
- if name == 'l1': # L1 D-TLBs
+ name = TLB_type["name"]
+ num_TLBs = TLB_type["width"]
+ if name == "l1": # L1 D-TLBs
tlb_per_cu = num_TLBs // n_cu
for cu_idx in range(n_cu):
if tlb_per_cu:
for tlb in range(tlb_per_cu):
- exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
- system.l1_coalescer[%d].cpu_side_ports[%d]' % \
- (shader_idx, cu_idx, tlb,
- cu_idx*tlb_per_cu+tlb, 0))
+ exec(
+ "system.cpu[%d].CUs[%d].translation_port[%d] = \
+ system.l1_coalescer[%d].cpu_side_ports[%d]"
+ % (
+ shader_idx,
+ cu_idx,
+ tlb,
+ cu_idx * tlb_per_cu + tlb,
+ 0,
+ )
+ )
else:
- exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
- system.l1_coalescer[%d].cpu_side_ports[%d]' % \
- (shader_idx, cu_idx, tlb_per_cu,
- cu_idx / (n_cu / num_TLBs),
- cu_idx % (n_cu / num_TLBs)))
- elif name == 'sqc': # I-TLB
+ exec(
+ "system.cpu[%d].CUs[%d].translation_port[%d] = \
+ system.l1_coalescer[%d].cpu_side_ports[%d]"
+ % (
+ shader_idx,
+ cu_idx,
+ tlb_per_cu,
+ cu_idx / (n_cu / num_TLBs),
+ cu_idx % (n_cu / num_TLBs),
+ )
+ )
+ elif name == "sqc": # I-TLB
for index in range(n_cu):
sqc_tlb_index = index / options.cu_per_sqc
sqc_tlb_port_id = index % options.cu_per_sqc
- exec('system.cpu[%d].CUs[%d].sqc_tlb_port = \
- system.sqc_coalescer[%d].cpu_side_ports[%d]' % \
- (shader_idx, index, sqc_tlb_index, sqc_tlb_port_id))
- elif name == 'scalar': # Scalar D-TLB
+ exec(
+ "system.cpu[%d].CUs[%d].sqc_tlb_port = \
+ system.sqc_coalescer[%d].cpu_side_ports[%d]"
+ % (shader_idx, index, sqc_tlb_index, sqc_tlb_port_id)
+ )
+ elif name == "scalar": # Scalar D-TLB
for index in range(n_cu):
scalar_tlb_index = index / options.cu_per_scalar_cache
scalar_tlb_port_id = index % options.cu_per_scalar_cache
- exec('system.cpu[%d].CUs[%d].scalar_tlb_port = \
- system.scalar_coalescer[%d].cpu_side_ports[%d]' % \
- (shader_idx, index, scalar_tlb_index,
- scalar_tlb_port_id))
+ exec(
+ "system.cpu[%d].CUs[%d].scalar_tlb_port = \
+ system.scalar_coalescer[%d].cpu_side_ports[%d]"
+ % (shader_idx, index, scalar_tlb_index, scalar_tlb_port_id)
+ )
# Connect the memSidePorts of all the TLBs with the
# cpuSidePorts of the Coalescers of the next level
@@ -220,23 +285,28 @@
# L1 <-> L2
l2_coalescer_index = 0
for TLB_type in L1:
- name = TLB_type['name']
- for index in range(TLB_type['width']):
- exec('system.%s_tlb[%d].mem_side_ports[0] = \
- system.l2_coalescer[0].cpu_side_ports[%d]' % \
- (name, index, l2_coalescer_index))
+ name = TLB_type["name"]
+ for index in range(TLB_type["width"]):
+ exec(
+ "system.%s_tlb[%d].mem_side_ports[0] = \
+ system.l2_coalescer[0].cpu_side_ports[%d]"
+ % (name, index, l2_coalescer_index)
+ )
l2_coalescer_index += 1
# L2 <-> L3
- system.l2_tlb[0].mem_side_ports[0] = \
- system.l3_coalescer[0].cpu_side_ports[0]
+ system.l2_tlb[0].mem_side_ports[0] = system.l3_coalescer[0].cpu_side_ports[
+ 0
+ ]
# L3 TLB Vega page table walker to memory for full system only
if full_system:
for TLB_type in L3:
- name = TLB_type['name']
- for index in range(TLB_type['width']):
- exec('system._dma_ports.append(system.%s_tlb[%d].walker)' % \
- (name, index))
+ name = TLB_type["name"]
+ for index in range(TLB_type["width"]):
+ exec(
+ "system._dma_ports.append(system.%s_tlb[%d].walker)"
+ % (name, index)
+ )
return system
diff --git a/configs/common/GPUTLBOptions.py b/configs/common/GPUTLBOptions.py
index 3a1f9ad..1a77a2c 100644
--- a/configs/common/GPUTLBOptions.py
+++ b/configs/common/GPUTLBOptions.py
@@ -27,77 +27,105 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
+
def tlb_options(parser):
- #===================================================================
+ # ===================================================================
# TLB Configuration
- #===================================================================
+ # ===================================================================
parser.add_argument(
- "--TLB-config", type=str, default="perCU",
- help="Options are: perCU (default), mono, 2CU, or perLane")
+ "--TLB-config",
+ type=str,
+ default="perCU",
+ help="Options are: perCU (default), mono, 2CU, or perLane",
+ )
- #===================================================================
+ # ===================================================================
# L1 TLB Options (D-TLB, I-TLB, Dispatcher-TLB)
- #===================================================================
+ # ===================================================================
parser.add_argument("--L1TLBentries", type=int, default="32")
parser.add_argument("--L1TLBassoc", type=int, default="32")
- parser.add_argument("--L1AccessLatency", type=int, default="1",
- help="latency in gpu cycles")
- parser.add_argument("--L1MissLatency", type=int, default="750",
- help="latency (in gpu cycles) of a page walk, "
- "if this is a last level TLB")
+ parser.add_argument(
+ "--L1AccessLatency",
+ type=int,
+ default="1",
+ help="latency in gpu cycles",
+ )
+ parser.add_argument(
+ "--L1MissLatency",
+ type=int,
+ default="750",
+ help="latency (in gpu cycles) of a page walk, "
+ "if this is a last level TLB",
+ )
parser.add_argument("--L1MaxOutstandingReqs", type=int, default="64")
parser.add_argument("--L1AccessDistanceStat", action="store_true")
parser.add_argument("--tot-L1TLB-size", type=int, default="0")
- #===================================================================
+ # ===================================================================
# L2 TLB Options
- #===================================================================
+ # ===================================================================
parser.add_argument("--L2TLBentries", type=int, default="4096")
parser.add_argument("--L2TLBassoc", type=int, default="32")
- parser.add_argument("--L2AccessLatency", type=int, default="69",
- help="latency in gpu cycles")
- parser.add_argument("--L2MissLatency", type=int, default="750",
- help="latency (in gpu cycles) of a page walk, "
- "if this is a last level TLB")
+ parser.add_argument(
+ "--L2AccessLatency",
+ type=int,
+ default="69",
+ help="latency in gpu cycles",
+ )
+ parser.add_argument(
+ "--L2MissLatency",
+ type=int,
+ default="750",
+ help="latency (in gpu cycles) of a page walk, "
+ "if this is a last level TLB",
+ )
parser.add_argument("--L2MaxOutstandingReqs", type=int, default="64")
parser.add_argument("--L2AccessDistanceStat", action="store_true")
- #===================================================================
+ # ===================================================================
# L3 TLB Options
- #===================================================================
+ # ===================================================================
parser.add_argument("--L3TLBentries", type=int, default="8192")
parser.add_argument("--L3TLBassoc", type=int, default="32")
- parser.add_argument("--L3AccessLatency", type=int, default="150",
- help="latency in gpu cycles")
- parser.add_argument("--L3MissLatency", type=int, default="750",
- help="latency (in gpu cycles) of a page walk")
+ parser.add_argument(
+ "--L3AccessLatency",
+ type=int,
+ default="150",
+ help="latency in gpu cycles",
+ )
+ parser.add_argument(
+ "--L3MissLatency",
+ type=int,
+ default="750",
+ help="latency (in gpu cycles) of a page walk",
+ )
parser.add_argument("--L3MaxOutstandingReqs", type=int, default="64")
parser.add_argument("--L3AccessDistanceStat", action="store_true")
- #===================================================================
+ # ===================================================================
# L1 TLBCoalescer Options
- #===================================================================
+ # ===================================================================
parser.add_argument("--L1ProbesPerCycle", type=int, default="2")
parser.add_argument("--L1CoalescingWindow", type=int, default="1")
parser.add_argument("--L1DisableCoalescing", action="store_true")
- #===================================================================
+ # ===================================================================
# L2 TLBCoalescer Options
- #===================================================================
+ # ===================================================================
parser.add_argument("--L2ProbesPerCycle", type=int, default="2")
parser.add_argument("--L2CoalescingWindow", type=int, default="1")
parser.add_argument("--L2DisableCoalescing", action="store_true")
- #===================================================================
+ # ===================================================================
# L3 TLBCoalescer Options
- #===================================================================
+ # ===================================================================
parser.add_argument("--L3ProbesPerCycle", type=int, default="2")
parser.add_argument("--L3CoalescingWindow", type=int, default="1")
diff --git a/configs/common/HMC.py b/configs/common/HMC.py
index b12bd0a..f8321f3 100644
--- a/configs/common/HMC.py
+++ b/configs/common/HMC.py
@@ -129,159 +129,303 @@
def add_options(parser):
# *****************************CROSSBAR PARAMETERS*************************
# Flit size of the main interconnect [1]
- parser.add_argument("--xbar-width", default=32, action="store", type=int,
- help="Data width of the main XBar (Bytes)")
+ parser.add_argument(
+ "--xbar-width",
+ default=32,
+ action="store",
+ type=int,
+ help="Data width of the main XBar (Bytes)",
+ )
# Clock frequency of the main interconnect [1]
# This crossbar, is placed on the logic-based of the HMC and it has its
# own voltage and clock domains, different from the DRAM dies or from the
# host.
- parser.add_argument("--xbar-frequency", default='1GHz', type=str,
- help="Clock Frequency of the main XBar")
+ parser.add_argument(
+ "--xbar-frequency",
+ default="1GHz",
+ type=str,
+ help="Clock Frequency of the main XBar",
+ )
# Arbitration latency of the HMC XBar [1]
- parser.add_argument("--xbar-frontend-latency", default=1, action="store",
- type=int, help="Arbitration latency of the XBar")
+ parser.add_argument(
+ "--xbar-frontend-latency",
+ default=1,
+ action="store",
+ type=int,
+ help="Arbitration latency of the XBar",
+ )
# Latency to forward a packet via the interconnect [1](two levels of FIFOs
# at the input and output of the inteconnect)
- parser.add_argument("--xbar-forward-latency", default=2, action="store",
- type=int, help="Forward latency of the XBar")
+ parser.add_argument(
+ "--xbar-forward-latency",
+ default=2,
+ action="store",
+ type=int,
+ help="Forward latency of the XBar",
+ )
# Latency to forward a response via the interconnect [1](two levels of
# FIFOs at the input and output of the inteconnect)
- parser.add_argument("--xbar-response-latency", default=2, action="store",
- type=int, help="Response latency of the XBar")
+ parser.add_argument(
+ "--xbar-response-latency",
+ default=2,
+ action="store",
+ type=int,
+ help="Response latency of the XBar",
+ )
# number of cross which connects 16 Vaults to serial link[7]
- parser.add_argument("--number-mem-crossbar", default=4, action="store",
- type=int, help="Number of crossbar in HMC")
+ parser.add_argument(
+ "--number-mem-crossbar",
+ default=4,
+ action="store",
+ type=int,
+ help="Number of crossbar in HMC",
+ )
# *****************************SERIAL LINK PARAMETERS**********************
# Number of serial links controllers [1]
- parser.add_argument("--num-links-controllers", default=4, action="store",
- type=int, help="Number of serial links")
+ parser.add_argument(
+ "--num-links-controllers",
+ default=4,
+ action="store",
+ type=int,
+ help="Number of serial links",
+ )
# Number of packets (not flits) to store at the request side of the serial
# link. This number should be adjusted to achive required bandwidth
- parser.add_argument("--link-buffer-size-req", default=10, action="store",
- type=int, help="Number of packets to buffer at the\
- request side of the serial link")
+ parser.add_argument(
+ "--link-buffer-size-req",
+ default=10,
+ action="store",
+ type=int,
+ help="Number of packets to buffer at the\
+ request side of the serial link",
+ )
# Number of packets (not flits) to store at the response side of the serial
# link. This number should be adjusted to achive required bandwidth
- parser.add_argument("--link-buffer-size-rsp", default=10, action="store",
- type=int, help="Number of packets to buffer at the\
- response side of the serial link")
+ parser.add_argument(
+ "--link-buffer-size-rsp",
+ default=10,
+ action="store",
+ type=int,
+ help="Number of packets to buffer at the\
+ response side of the serial link",
+ )
# Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus
# the PCB trace latency (3ns Estimated based on [5])
- parser.add_argument("--link-latency", default='4.6ns', type=str,
- help="Latency of the serial links")
+ parser.add_argument(
+ "--link-latency",
+ default="4.6ns",
+ type=str,
+ help="Latency of the serial links",
+ )
# Clock frequency of the each serial link(SerDes) [1]
- parser.add_argument("--link-frequency", default='10GHz', type=str,
- help="Clock Frequency of the serial links")
+ parser.add_argument(
+ "--link-frequency",
+ default="10GHz",
+ type=str,
+ help="Clock Frequency of the serial links",
+ )
# Clock frequency of serial link Controller[6]
# clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] /
# data_path_width * 10^6
# clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz
- parser.add_argument("--link-controller-frequency", default='625MHz',
- type=str, help="Clock Frequency of the link\
- controller")
+ parser.add_argument(
+ "--link-controller-frequency",
+ default="625MHz",
+ type=str,
+ help="Clock Frequency of the link\
+ controller",
+ )
# Latency of the serial link controller to process the packets[1][6]
# (ClockDomain = 625 Mhz )
# used here for calculations only
- parser.add_argument("--link-ctrl-latency", default=4, action="store",
- type=int, help="The number of cycles required for the\
- controller to process the packet")
+ parser.add_argument(
+ "--link-ctrl-latency",
+ default=4,
+ action="store",
+ type=int,
+ help="The number of cycles required for the\
+ controller to process the packet",
+ )
# total_ctrl_latency = link_ctrl_latency + link_latency
# total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns
- parser.add_argument("--total-ctrl-latency", default='11ns', type=str,
- help="The latency experienced by every packet\
- regardless of size of packet")
+ parser.add_argument(
+ "--total-ctrl-latency",
+ default="11ns",
+ type=str,
+ help="The latency experienced by every packet\
+ regardless of size of packet",
+ )
# Number of parallel lanes in each serial link [1]
- parser.add_argument("--num-lanes-per-link", default=16, action="store",
- type=int, help="Number of lanes per each link")
+ parser.add_argument(
+ "--num-lanes-per-link",
+ default=16,
+ action="store",
+ type=int,
+ help="Number of lanes per each link",
+ )
# Number of serial links [1]
- parser.add_argument("--num-serial-links", default=4, action="store",
- type=int, help="Number of serial links")
+ parser.add_argument(
+ "--num-serial-links",
+ default=4,
+ action="store",
+ type=int,
+ help="Number of serial links",
+ )
# speed of each lane of serial link - SerDes serial interface 10 Gb/s
- parser.add_argument("--serial-link-speed", default=10, action="store",
- type=int, help="Gbs/s speed of each lane of serial\
- link")
+ parser.add_argument(
+ "--serial-link-speed",
+ default=10,
+ action="store",
+ type=int,
+ help="Gbs/s speed of each lane of serial\
+ link",
+ )
# address range for each of the serial links
- parser.add_argument("--serial-link-addr-range", default='1GB', type=str,
- help="memory range for each of the serial links.\
- Default: 1GB")
+ parser.add_argument(
+ "--serial-link-addr-range",
+ default="1GB",
+ type=str,
+ help="memory range for each of the serial links.\
+ Default: 1GB",
+ )
# *****************************PERFORMANCE MONITORING*********************
# The main monitor behind the HMC Controller
- parser.add_argument("--enable-global-monitor", action="store_true",
- help="The main monitor behind the HMC Controller")
+ parser.add_argument(
+ "--enable-global-monitor",
+ action="store_true",
+ help="The main monitor behind the HMC Controller",
+ )
# The link performance monitors
- parser.add_argument("--enable-link-monitor", action="store_true",
- help="The link monitors")
+ parser.add_argument(
+ "--enable-link-monitor", action="store_true", help="The link monitors"
+ )
# link aggregator enable - put a cross between buffers & links
- parser.add_argument("--enable-link-aggr", action="store_true", help="The\
- crossbar between port and Link Controller")
+ parser.add_argument(
+ "--enable-link-aggr",
+ action="store_true",
+ help="The\
+ crossbar between port and Link Controller",
+ )
- parser.add_argument("--enable-buff-div", action="store_true",
- help="Memory Range of Buffer is ivided between total\
- range")
+ parser.add_argument(
+ "--enable-buff-div",
+ action="store_true",
+ help="Memory Range of Buffer is ivided between total\
+ range",
+ )
# *****************************HMC ARCHITECTURE **************************
# Memory chunk for 16 vault - numbers of vault / number of crossbars
- parser.add_argument("--mem-chunk", default=4, action="store", type=int,
- help="Chunk of memory range for each cross bar in\
- arch 0")
+ parser.add_argument(
+ "--mem-chunk",
+ default=4,
+ action="store",
+ type=int,
+ help="Chunk of memory range for each cross bar in\
+ arch 0",
+ )
# size of req buffer within crossbar, used for modelling extra latency
# when the reuqest go to non-local vault
- parser.add_argument("--xbar-buffer-size-req", default=10, action="store",
- type=int, help="Number of packets to buffer at the\
- request side of the crossbar")
+ parser.add_argument(
+ "--xbar-buffer-size-req",
+ default=10,
+ action="store",
+ type=int,
+ help="Number of packets to buffer at the\
+ request side of the crossbar",
+ )
# size of response buffer within crossbar, used for modelling extra latency
# when the response received from non-local vault
- parser.add_argument("--xbar-buffer-size-resp", default=10, action="store",
- type=int, help="Number of packets to buffer at the\
- response side of the crossbar")
+ parser.add_argument(
+ "--xbar-buffer-size-resp",
+ default=10,
+ action="store",
+ type=int,
+ help="Number of packets to buffer at the\
+ response side of the crossbar",
+ )
# HMC device architecture. It affects the HMC host controller as well
- parser.add_argument("--arch", type=str, choices=["same", "distributed",
- "mixed"], default="distributed", help="same: HMC with\
+ parser.add_argument(
+ "--arch",
+ type=str,
+ choices=["same", "distributed", "mixed"],
+ default="distributed",
+ help="same: HMC with\
4 links, all with same range.\ndistributed: HMC with\
4 links with distributed range.\nmixed: mixed with\
- same and distributed range.\nDefault: distributed")
+ same and distributed range.\nDefault: distributed",
+ )
# HMC device - number of vaults
- parser.add_argument("--hmc-dev-num-vaults", default=16, action="store",
- type=int, help="number of independent vaults within\
+ parser.add_argument(
+ "--hmc-dev-num-vaults",
+ default=16,
+ action="store",
+ type=int,
+ help="number of independent vaults within\
the HMC device. Note: each vault has a memory\
- controller (valut controller)\nDefault: 16")
+ controller (valut controller)\nDefault: 16",
+ )
# HMC device - vault capacity or size
- parser.add_argument("--hmc-dev-vault-size", default='256MB', type=str,
- help="vault storage capacity in bytes. Default:\
- 256MB")
- parser.add_argument("--mem-type", type=str, choices=["HMC_2500_1x32"],
- default="HMC_2500_1x32", help="type of HMC memory to\
- use. Default: HMC_2500_1x32")
- parser.add_argument("--mem-channels", default=1, action="store", type=int,
- help="Number of memory channels")
- parser.add_argument("--mem-ranks", default=1, action="store", type=int,
- help="Number of ranks to iterate across")
- parser.add_argument("--burst-length", default=256, action="store",
- type=int, help="burst length in bytes. Note: the\
+ parser.add_argument(
+ "--hmc-dev-vault-size",
+ default="256MB",
+ type=str,
+ help="vault storage capacity in bytes. Default:\
+ 256MB",
+ )
+ parser.add_argument(
+ "--mem-type",
+ type=str,
+ choices=["HMC_2500_1x32"],
+ default="HMC_2500_1x32",
+ help="type of HMC memory to\
+ use. Default: HMC_2500_1x32",
+ )
+ parser.add_argument(
+ "--mem-channels",
+ default=1,
+ action="store",
+ type=int,
+ help="Number of memory channels",
+ )
+ parser.add_argument(
+ "--mem-ranks",
+ default=1,
+ action="store",
+ type=int,
+ help="Number of ranks to iterate across",
+ )
+ parser.add_argument(
+ "--burst-length",
+ default=256,
+ action="store",
+ type=int,
+ help="burst length in bytes. Note: the\
cache line size will be set to this value.\nDefault:\
- 256")
+ 256",
+ )
# configure HMC host controller
@@ -292,8 +436,8 @@
# Create additional crossbar for arch1
if opt.arch == "distributed" or opt.arch == "mixed":
- clk = '100GHz'
- vd = VoltageDomain(voltage='1V')
+ clk = "100GHz"
+ vd = VoltageDomain(voltage="1V")
# Create additional crossbar for arch1
system.membus = NoncoherentXBar(width=8)
system.membus.badaddr_responder = BadAddr()
@@ -310,42 +454,50 @@
# Memmory ranges of serial link for arch-0. Same as the ranges of vault
# controllers (4 vaults to 1 serial link)
if opt.arch == "same":
- ser_ranges = [AddrRange(0, (4*slar)-1) for i in
- range(opt.num_serial_links)]
+ ser_ranges = [
+ AddrRange(0, (4 * slar) - 1) for i in range(opt.num_serial_links)
+ ]
# Memmory ranges of serial link for arch-1. Distributed range accross
# links
if opt.arch == "distributed":
- ser_ranges = [AddrRange(i*slar, ((i+1)*slar)-1) for i in
- range(opt.num_serial_links)]
+ ser_ranges = [
+ AddrRange(i * slar, ((i + 1) * slar) - 1)
+ for i in range(opt.num_serial_links)
+ ]
# Memmory ranges of serial link for arch-2 'Mixed' address distribution
# over links
if opt.arch == "mixed":
- ser_range0 = AddrRange(0, (1*slar)-1)
- ser_range1 = AddrRange(1*slar, 2*slar-1)
- ser_range2 = AddrRange(0, (4*slar)-1)
- ser_range3 = AddrRange(0, (4*slar)-1)
+ ser_range0 = AddrRange(0, (1 * slar) - 1)
+ ser_range1 = AddrRange(1 * slar, 2 * slar - 1)
+ ser_range2 = AddrRange(0, (4 * slar) - 1)
+ ser_range3 = AddrRange(0, (4 * slar) - 1)
ser_ranges = [ser_range0, ser_range1, ser_range2, ser_range3]
# Serial link Controller with 16 SerDes links at 10 Gbps with serial link
# ranges w.r.t to architecture
- sl = [SerialLink(ranges=ser_ranges[i],
- req_size=opt.link_buffer_size_req,
- resp_size=opt.link_buffer_size_rsp,
- num_lanes=opt.num_lanes_per_link,
- link_speed=opt.serial_link_speed,
- delay=opt.total_ctrl_latency) for i in
- range(opt.num_serial_links)]
+ sl = [
+ SerialLink(
+ ranges=ser_ranges[i],
+ req_size=opt.link_buffer_size_req,
+ resp_size=opt.link_buffer_size_rsp,
+ num_lanes=opt.num_lanes_per_link,
+ link_speed=opt.serial_link_speed,
+ delay=opt.total_ctrl_latency,
+ )
+ for i in range(opt.num_serial_links)
+ ]
system.hmc_host.seriallink = sl
# enable global monitor
if opt.enable_global_monitor:
- system.hmc_host.lmonitor = [CommMonitor() for i in
- range(opt.num_serial_links)]
+ system.hmc_host.lmonitor = [
+ CommMonitor() for i in range(opt.num_serial_links)
+ ]
# set the clock frequency for serial link
for i in range(opt.num_serial_links):
clk = opt.link_controller_frequency
- vd = VoltageDomain(voltage='1V')
+ vd = VoltageDomain(voltage="1V")
scd = SrcClockDomain(clock=clk, voltage_domain=vd)
system.hmc_host.seriallink[i].clk_domain = scd
@@ -387,8 +539,10 @@
# create memory ranges for the vault controllers
arv = convert.toMemorySize(opt.hmc_dev_vault_size)
- addr_ranges_vaults = [AddrRange(i*arv, ((i+1)*arv-1)) for i in
- range(opt.hmc_dev_num_vaults)]
+ addr_ranges_vaults = [
+ AddrRange(i * arv, ((i + 1) * arv - 1))
+ for i in range(opt.hmc_dev_num_vaults)
+ ]
system.mem_ranges = addr_ranges_vaults
if opt.enable_link_monitor:
@@ -396,29 +550,36 @@
system.hmc_dev.lmonitor = lm
# 4 HMC Crossbars located in its logic-base (LoB)
- xb = [NoncoherentXBar(width=opt.xbar_width,
- frontend_latency=opt.xbar_frontend_latency,
- forward_latency=opt.xbar_forward_latency,
- response_latency=opt.xbar_response_latency) for i in
- range(opt.number_mem_crossbar)]
+ xb = [
+ NoncoherentXBar(
+ width=opt.xbar_width,
+ frontend_latency=opt.xbar_frontend_latency,
+ forward_latency=opt.xbar_forward_latency,
+ response_latency=opt.xbar_response_latency,
+ )
+ for i in range(opt.number_mem_crossbar)
+ ]
system.hmc_dev.xbar = xb
for i in range(opt.number_mem_crossbar):
clk = opt.xbar_frequency
- vd = VoltageDomain(voltage='1V')
+ vd = VoltageDomain(voltage="1V")
scd = SrcClockDomain(clock=clk, voltage_domain=vd)
system.hmc_dev.xbar[i].clk_domain = scd
# Attach 4 serial link to 4 crossbar/s
for i in range(opt.num_serial_links):
if opt.enable_link_monitor:
- system.hmc_host.seriallink[i].mem_side_port = \
- system.hmc_dev.lmonitor[i].cpu_side_port
- system.hmc_dev.lmonitor[i].mem_side_port = \
- system.hmc_dev.xbar[i].cpu_side_ports
+ system.hmc_host.seriallink[
+ i
+ ].mem_side_port = system.hmc_dev.lmonitor[i].cpu_side_port
+ system.hmc_dev.lmonitor[i].mem_side_port = system.hmc_dev.xbar[
+ i
+ ].cpu_side_ports
else:
- system.hmc_host.seriallink[i].mem_side_port = \
- system.hmc_dev.xbar[i].cpu_side_ports
+ system.hmc_host.seriallink[i].mem_side_port = system.hmc_dev.xbar[
+ i
+ ].cpu_side_ports
# Connecting xbar with each other for request arriving at the wrong xbar,
# then it will be forward to correct xbar. Bridge is used to connect xbars
@@ -426,9 +587,13 @@
numx = len(system.hmc_dev.xbar)
# create a list of buffers
- system.hmc_dev.buffers = [Bridge(req_size=opt.xbar_buffer_size_req,
- resp_size=opt.xbar_buffer_size_resp)
- for i in range(numx*(opt.mem_chunk-1))]
+ system.hmc_dev.buffers = [
+ Bridge(
+ req_size=opt.xbar_buffer_size_req,
+ resp_size=opt.xbar_buffer_size_resp,
+ )
+ for i in range(numx * (opt.mem_chunk - 1))
+ ]
# Buffer iterator
it = iter(list(range(len(system.hmc_dev.buffers))))
@@ -446,14 +611,18 @@
# Change the default values for ranges of bridge
system.hmc_dev.buffers[index].ranges = system.mem_ranges[
- j * int(opt.mem_chunk):
- (j + 1) * int(opt.mem_chunk)]
+ j * int(opt.mem_chunk) : (j + 1) * int(opt.mem_chunk)
+ ]
# Connect the bridge between corssbars
- system.hmc_dev.xbar[i].mem_side_ports = \
- system.hmc_dev.buffers[index].cpu_side_port
- system.hmc_dev.buffers[index].mem_side_port = \
- system.hmc_dev.xbar[j].cpu_side_ports
+ system.hmc_dev.xbar[
+ i
+ ].mem_side_ports = system.hmc_dev.buffers[
+ index
+ ].cpu_side_port
+ system.hmc_dev.buffers[
+ index
+ ].mem_side_port = system.hmc_dev.xbar[j].cpu_side_ports
else:
# Don't connect the xbar to itself
pass
@@ -462,37 +631,49 @@
# can only direct traffic to it local vaults
if opt.arch == "mixed":
system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
- system.hmc_dev.xbar[3].mem_side_ports = \
- system.hmc_dev.buffer30.cpu_side_port
- system.hmc_dev.buffer30.mem_side_port = \
- system.hmc_dev.xbar[0].cpu_side_ports
+ system.hmc_dev.xbar[
+ 3
+ ].mem_side_ports = system.hmc_dev.buffer30.cpu_side_port
+ system.hmc_dev.buffer30.mem_side_port = system.hmc_dev.xbar[
+ 0
+ ].cpu_side_ports
system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
- system.hmc_dev.xbar[3].mem_side_ports = \
- system.hmc_dev.buffer31.cpu_side_port
- system.hmc_dev.buffer31.mem_side_port = \
- system.hmc_dev.xbar[1].cpu_side_ports
+ system.hmc_dev.xbar[
+ 3
+ ].mem_side_ports = system.hmc_dev.buffer31.cpu_side_port
+ system.hmc_dev.buffer31.mem_side_port = system.hmc_dev.xbar[
+ 1
+ ].cpu_side_ports
system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
- system.hmc_dev.xbar[3].mem_side_ports = \
- system.hmc_dev.buffer32.cpu_side_port
- system.hmc_dev.buffer32.mem_side_port = \
- system.hmc_dev.xbar[2].cpu_side_ports
+ system.hmc_dev.xbar[
+ 3
+ ].mem_side_ports = system.hmc_dev.buffer32.cpu_side_port
+ system.hmc_dev.buffer32.mem_side_port = system.hmc_dev.xbar[
+ 2
+ ].cpu_side_ports
system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
- system.hmc_dev.xbar[2].mem_side_ports = \
- system.hmc_dev.buffer20.cpu_side_port
- system.hmc_dev.buffer20.mem_side_port = \
- system.hmc_dev.xbar[0].cpu_side_ports
+ system.hmc_dev.xbar[
+ 2
+ ].mem_side_ports = system.hmc_dev.buffer20.cpu_side_port
+ system.hmc_dev.buffer20.mem_side_port = system.hmc_dev.xbar[
+ 0
+ ].cpu_side_ports
system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
- system.hmc_dev.xbar[2].mem_side_ports = \
- system.hmc_dev.buffer21.cpu_side_port
- system.hmc_dev.buffer21.mem_side_port = \
- system.hmc_dev.xbar[1].cpu_side_ports
+ system.hmc_dev.xbar[
+ 2
+ ].mem_side_ports = system.hmc_dev.buffer21.cpu_side_port
+ system.hmc_dev.buffer21.mem_side_port = system.hmc_dev.xbar[
+ 1
+ ].cpu_side_ports
system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
- system.hmc_dev.xbar[2].mem_side_ports = \
- system.hmc_dev.buffer23.cpu_side_port
- system.hmc_dev.buffer23.mem_side_port = \
- system.hmc_dev.xbar[3].cpu_side_ports
+ system.hmc_dev.xbar[
+ 2
+ ].mem_side_ports = system.hmc_dev.buffer23.cpu_side_port
+ system.hmc_dev.buffer23.mem_side_port = system.hmc_dev.xbar[
+ 3
+ ].cpu_side_ports
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index 332fd6b..baa0d23 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -37,8 +37,8 @@
from common import ObjectList
from common import HMC
-def create_mem_intf(intf, r, i, intlv_bits, intlv_size,
- xor_low_bit):
+
+def create_mem_intf(intf, r, i, intlv_bits, intlv_size, xor_low_bit):
"""
Helper function for creating a single memoy controller from the given
options. This function is invoked multiple times in config_mem function
@@ -46,6 +46,7 @@
"""
import math
+
intlv_low_bit = int(math.log(intlv_size, 2))
# Use basic hashing for the channel selection, and preferably use
@@ -53,7 +54,7 @@
# the details of the caches here, make an educated guess. 4 MByte
# 4-way associative with 64 byte cache lines is 6 offset bits and
# 14 index bits.
- if (xor_low_bit):
+ if xor_low_bit:
xor_high_bit = xor_low_bit + intlv_bits - 1
else:
xor_high_bit = 0
@@ -67,13 +68,15 @@
# If the channel bits are appearing after the column
# bits, we need to add the appropriate number of bits
# for the row buffer size
- if interface.addr_mapping.value == 'RoRaBaChCo':
+ if interface.addr_mapping.value == "RoRaBaChCo":
# This computation only really needs to happen
# once, but as we rely on having an instance we
# end up having to repeat it for each and every
# one
- rowbuffer_size = interface.device_rowbuffer_size.value * \
- interface.devices_per_rank.value
+ rowbuffer_size = (
+ interface.device_rowbuffer_size.value
+ * interface.devices_per_rank.value
+ )
intlv_low_bit = int(math.log(rowbuffer_size, 2))
@@ -83,7 +86,7 @@
# If the channel bits are appearing after the low order
# address bits (buffer bits), we need to add the appropriate
# number of bits for the buffer size
- if interface.addr_mapping.value == 'RoRaBaChCo':
+ if interface.addr_mapping.value == "RoRaBaChCo":
# This computation only really needs to happen
# once, but as we rely on having an instance we
# end up having to repeat it for each and every
@@ -94,14 +97,17 @@
# We got all we need to configure the appropriate address
# range
- interface.range = m5.objects.AddrRange(r.start, size = r.size(),
- intlvHighBit = \
- intlv_low_bit + intlv_bits - 1,
- xorHighBit = xor_high_bit,
- intlvBits = intlv_bits,
- intlvMatch = i)
+ interface.range = m5.objects.AddrRange(
+ r.start,
+ size=r.size(),
+ intlvHighBit=intlv_low_bit + intlv_bits - 1,
+ xorHighBit=xor_high_bit,
+ intlvBits=intlv_bits,
+ intlvMatch=i,
+ )
return interface
+
def config_mem(options, system):
"""
Create the memory controllers based on the options and attach them.
@@ -125,8 +131,9 @@
# Optional options
opt_tlm_memory = getattr(options, "tlm_memory", None)
- opt_external_memory_system = getattr(options, "external_memory_system",
- None)
+ opt_external_memory_system = getattr(
+ options, "external_memory_system", None
+ )
opt_elastic_trace_en = getattr(options, "elastic_trace_en", False)
opt_mem_ranks = getattr(options, "mem_ranks", None)
opt_nvm_ranks = getattr(options, "nvm_ranks", None)
@@ -149,15 +156,18 @@
port_type="tlm_slave",
port_data=opt_tlm_memory,
port=system.membus.mem_side_ports,
- addr_ranges=system.mem_ranges)
+ addr_ranges=system.mem_ranges,
+ )
system.workload.addr_check = False
return
if opt_external_memory_system:
subsystem.external_memory = m5.objects.ExternalSlave(
port_type=opt_external_memory_system,
- port_data="init_mem0", port=xbar.mem_side_ports,
- addr_ranges=system.mem_ranges)
+ port_data="init_mem0",
+ port=xbar.mem_side_ports,
+ addr_ranges=system.mem_ranges,
+ )
subsystem.workload.addr_check = False
return
@@ -165,8 +175,9 @@
import math
from m5.util import fatal
+
intlv_bits = int(math.log(nbr_mem_ctrls, 2))
- if 2 ** intlv_bits != nbr_mem_ctrls:
+ if 2**intlv_bits != nbr_mem_ctrls:
fatal("Number of memory channels must be a power of 2")
if opt_mem_type:
@@ -178,8 +189,10 @@
mem_ctrls = []
if opt_elastic_trace_en and not issubclass(intf, m5.objects.SimpleMemory):
- fatal("When elastic trace is enabled, configure mem-type as "
- "simple-mem.")
+ fatal(
+ "When elastic trace is enabled, configure mem-type as "
+ "simple-mem."
+ )
# The default behaviour is to interleave memory channels on 128
# byte granularity, or cache line granularity if larger than 128
@@ -199,13 +212,16 @@
for i in range(nbr_mem_ctrls):
if opt_mem_type and (not opt_nvm_type or range_iter % 2 != 0):
# Create the DRAM interface
- dram_intf = create_mem_intf(intf, r, i,
- intlv_bits, intlv_size, opt_xor_low_bit)
+ dram_intf = create_mem_intf(
+ intf, r, i, intlv_bits, intlv_size, opt_xor_low_bit
+ )
# Set the number of ranks based on the command-line
# options if it was explicitly set
- if issubclass(intf, m5.objects.DRAMInterface) and \
- opt_mem_ranks:
+ if (
+ issubclass(intf, m5.objects.DRAMInterface)
+ and opt_mem_ranks
+ ):
dram_intf.ranks_per_channel = opt_mem_ranks
# Enable low-power DRAM states if option is set
@@ -213,9 +229,11 @@
dram_intf.enable_dram_powerdown = opt_dram_powerdown
if opt_elastic_trace_en:
- dram_intf.latency = '1ns'
- print("For elastic trace, over-riding Simple Memory "
- "latency to 1ns.")
+ dram_intf.latency = "1ns"
+ print(
+ "For elastic trace, over-riding Simple Memory "
+ "latency to 1ns."
+ )
# Create the controller that will drive the interface
mem_ctrl = dram_intf.controller()
@@ -223,13 +241,16 @@
mem_ctrls.append(mem_ctrl)
elif opt_nvm_type and (not opt_mem_type or range_iter % 2 == 0):
- nvm_intf = create_mem_intf(n_intf, r, i,
- intlv_bits, intlv_size, opt_xor_low_bit)
+ nvm_intf = create_mem_intf(
+ n_intf, r, i, intlv_bits, intlv_size, opt_xor_low_bit
+ )
# Set the number of ranks based on the command-line
# options if it was explicitly set
- if issubclass(n_intf, m5.objects.NVMInterface) and \
- opt_nvm_ranks:
+ if (
+ issubclass(n_intf, m5.objects.NVMInterface)
+ and opt_nvm_ranks
+ ):
nvm_intf.ranks_per_channel = opt_nvm_ranks
# Create a controller if not sharing a channel with DRAM
@@ -244,13 +265,13 @@
# hook up NVM interface when channel is shared with DRAM + NVM
for i in range(len(nvm_intfs)):
- mem_ctrls[i].nvm = nvm_intfs[i];
+ mem_ctrls[i].nvm = nvm_intfs[i]
# Connect the controller to the xbar port
for i in range(len(mem_ctrls)):
if opt_mem_type == "HMC_2500_1x32":
# Connect the controllers to the membus
- mem_ctrls[i].port = xbar[i//4].mem_side_ports
+ mem_ctrls[i].port = xbar[i // 4].mem_side_ports
# Set memory device size. There is an independent controller
# for each vault. All vaults are same size.
mem_ctrls[i].dram.device_size = options.hmc_dev_vault_size
diff --git a/configs/common/ObjectList.py b/configs/common/ObjectList.py
index 685dbc1..ce52967 100644
--- a/configs/common/ObjectList.py
+++ b/configs/common/ObjectList.py
@@ -34,18 +34,20 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from gem5.runtime import get_supported_isas
import m5.objects
import m5.internal.params
import inspect
import sys
from textwrap import TextWrapper
+
class ObjectList(object):
- """ Creates a list of objects that are sub-classes of a given class. """
+ """Creates a list of objects that are sub-classes of a given class."""
def _is_obj_class(self, cls):
"""Determine if a class is a a sub class of the provided base class
- that can be instantiated.
+ that can be instantiated.
"""
# We can't use the normal inspect.isclass because the ParamFactory
@@ -63,16 +65,20 @@
sub_cls = self._sub_classes[real_name]
return sub_cls
except KeyError:
- print("{} is not a valid sub-class of {}.".format(name, \
- self.base_cls))
+ print(
+ "{} is not a valid sub-class of {}.".format(
+ name, self.base_cls
+ )
+ )
raise
def print(self):
"""Print a list of available sub-classes and aliases."""
print("Available {} classes:".format(self.base_cls))
- doc_wrapper = TextWrapper(initial_indent="\t\t",
- subsequent_indent="\t\t")
+ doc_wrapper = TextWrapper(
+ initial_indent="\t\t", subsequent_indent="\t\t"
+ )
for name, cls in list(self._sub_classes.items()):
print("\t{}".format(name))
@@ -117,6 +123,7 @@
self._aliases = {}
self._add_aliases(aliases)
+
class CPUList(ObjectList):
def _is_obj_class(self, cls):
"""Determine if a class is a CPU that can be instantiated"""
@@ -124,35 +131,42 @@
# We can't use the normal inspect.isclass because the ParamFactory
# and ProxyFactory classes have a tendency to confuse it.
try:
- return super(CPUList, self)._is_obj_class(cls) and \
- not issubclass(cls, m5.objects.CheckerCPU)
+ return super(CPUList, self)._is_obj_class(cls) and not issubclass(
+ cls, m5.objects.CheckerCPU
+ )
except (TypeError, AttributeError):
return False
def _add_objects(self):
super(CPUList, self)._add_objects()
- from m5.defines import buildEnv
from importlib import import_module
- for package in [ "generic", buildEnv['TARGET_ISA']]:
+
+ for isa in {
+ "generic",
+ } | {isa.name.lower() for isa in get_supported_isas()}:
try:
- package = import_module(".cores." + package,
- package=__name__.rpartition('.')[0])
+ package = import_module(
+ ".cores." + isa, package=__name__.rpartition(".")[0]
+ )
except ImportError:
# No timing models for this ISA
continue
- for mod_name, module in \
- inspect.getmembers(package, inspect.ismodule):
- for name, cls in inspect.getmembers(module,
- self._is_obj_class):
+ for mod_name, module in inspect.getmembers(
+ package, inspect.ismodule
+ ):
+ for name, cls in inspect.getmembers(
+ module, self._is_obj_class
+ ):
self._sub_classes[name] = cls
+
class EnumList(ObjectList):
- """ Creates a list of possible values for a given enum class. """
+ """Creates a list of possible values for a given enum class."""
def _add_objects(self):
- """ Add all enum values to the ObjectList """
+ """Add all enum values to the ObjectList"""
self._sub_classes = {}
for (key, value) in list(self.base_cls.__members__.items()):
# All Enums have a value Num_NAME at the end which we
@@ -160,31 +174,37 @@
if not key.startswith("Num_"):
self._sub_classes[key] = value
-rp_list = ObjectList(getattr(m5.objects, 'BaseReplacementPolicy', None))
-bp_list = ObjectList(getattr(m5.objects, 'BranchPredictor', None))
-cpu_list = CPUList(getattr(m5.objects, 'BaseCPU', None))
-hwp_list = ObjectList(getattr(m5.objects, 'BasePrefetcher', None))
-indirect_bp_list = ObjectList(getattr(m5.objects, 'IndirectPredictor', None))
-mem_list = ObjectList(getattr(m5.objects, 'AbstractMemory', None))
-dram_addr_map_list = EnumList(getattr(m5.internal.params, 'enum_AddrMap',
- None))
+
+rp_list = ObjectList(getattr(m5.objects, "BaseReplacementPolicy", None))
+bp_list = ObjectList(getattr(m5.objects, "BranchPredictor", None))
+cpu_list = CPUList(getattr(m5.objects, "BaseCPU", None))
+hwp_list = ObjectList(getattr(m5.objects, "BasePrefetcher", None))
+indirect_bp_list = ObjectList(getattr(m5.objects, "IndirectPredictor", None))
+mem_list = ObjectList(getattr(m5.objects, "AbstractMemory", None))
+dram_addr_map_list = EnumList(
+ getattr(m5.internal.params, "enum_AddrMap", None)
+)
# Platform aliases. The platforms listed here might not be compiled,
# we make sure they exist before we add them to the platform list.
-_platform_aliases_all = [
- ("VExpress_GEM5", "VExpress_GEM5_V1"),
- ]
-platform_list = ObjectList(getattr(m5.objects, 'Platform', None), \
- _platform_aliases_all)
+_platform_aliases_all = [("VExpress_GEM5", "VExpress_GEM5_V1")]
+platform_list = ObjectList(
+ getattr(m5.objects, "Platform", None), _platform_aliases_all
+)
+
def _subclass_tester(name):
sub_class = getattr(m5.objects, name, None)
def tester(cls):
- return sub_class is not None and cls is not None and \
- issubclass(cls, sub_class)
+ return (
+ sub_class is not None
+ and cls is not None
+ and issubclass(cls, sub_class)
+ )
return tester
+
is_kvm_cpu = _subclass_tester("BaseKvmCPU")
is_noncaching_cpu = _subclass_tester("NonCachingSimpleCPU")
diff --git a/configs/common/Options.py b/configs/common/Options.py
index a63cc7b..81d7791 100644
--- a/configs/common/Options.py
+++ b/configs/common/Options.py
@@ -97,6 +97,7 @@
ObjectList.platform_list.print()
sys.exit(0)
+
# Add the very basic options that work also in the case of the no ISA
# being used, and consequently no CPUs, but rather various types of
# testers and traffic generators.
@@ -104,41 +105,77 @@
def addNoISAOptions(parser):
parser.add_argument("-n", "--num-cpus", type=int, default=1)
- parser.add_argument("--sys-voltage", action="store", type=str,
- default='1.0V',
- help="""Top-level voltage for blocks running at system
- power supply""")
- parser.add_argument("--sys-clock", action="store", type=str,
- default='1GHz',
- help="""Top-level clock for blocks running at system
- speed""")
+ parser.add_argument(
+ "--sys-voltage",
+ action="store",
+ type=str,
+ default="1.0V",
+ help="""Top-level voltage for blocks running at system
+ power supply""",
+ )
+ parser.add_argument(
+ "--sys-clock",
+ action="store",
+ type=str,
+ default="1GHz",
+ help="""Top-level clock for blocks running at system
+ speed""",
+ )
# Memory Options
- parser.add_argument("--list-mem-types",
- action=ListMem, nargs=0,
- help="List available memory types")
- parser.add_argument("--mem-type", default="DDR3_1600_8x8",
- choices=ObjectList.mem_list.get_names(),
- help="type of memory to use")
- parser.add_argument("--mem-channels", type=int, default=1,
- help="number of memory channels")
- parser.add_argument("--mem-ranks", type=int, default=None,
- help="number of memory ranks per channel")
parser.add_argument(
- "--mem-size", action="store", type=str, default="512MB",
- help="Specify the physical memory size (single memory)")
- parser.add_argument("--enable-dram-powerdown", action="store_true",
- help="Enable low-power states in DRAMInterface")
- parser.add_argument("--mem-channels-intlv", type=int, default=0,
- help="Memory channels interleave")
+ "--list-mem-types",
+ action=ListMem,
+ nargs=0,
+ help="List available memory types",
+ )
+ parser.add_argument(
+ "--mem-type",
+ default="DDR3_1600_8x8",
+ choices=ObjectList.mem_list.get_names(),
+ help="type of memory to use",
+ )
+ parser.add_argument(
+ "--mem-channels", type=int, default=1, help="number of memory channels"
+ )
+ parser.add_argument(
+ "--mem-ranks",
+ type=int,
+ default=None,
+ help="number of memory ranks per channel",
+ )
+ parser.add_argument(
+ "--mem-size",
+ action="store",
+ type=str,
+ default="512MB",
+ help="Specify the physical memory size (single memory)",
+ )
+ parser.add_argument(
+ "--enable-dram-powerdown",
+ action="store_true",
+ help="Enable low-power states in DRAMInterface",
+ )
+ parser.add_argument(
+ "--mem-channels-intlv",
+ type=int,
+ default=0,
+ help="Memory channels interleave",
+ )
parser.add_argument("--memchecker", action="store_true")
# Cache Options
- parser.add_argument("--external-memory-system", type=str,
- help="use external ports of this port_type for caches")
- parser.add_argument("--tlm-memory", type=str,
- help="use external port for SystemC TLM cosimulation")
+ parser.add_argument(
+ "--external-memory-system",
+ type=str,
+ help="use external ports of this port_type for caches",
+ )
+ parser.add_argument(
+ "--tlm-memory",
+ type=str,
+ help="use external port for SystemC TLM cosimulation",
+ )
parser.add_argument("--caches", action="store_true")
parser.add_argument("--l2cache", action="store_true")
parser.add_argument("--num-dirs", type=int, default=1)
@@ -158,26 +195,44 @@
parser.add_argument("--ruby", action="store_true")
# Run duration options
- parser.add_argument("-m", "--abs-max-tick", type=int, default=m5.MaxTick,
- metavar="TICKS", help="Run to absolute simulated tick "
- "specified including ticks from a restored checkpoint")
parser.add_argument(
- "--rel-max-tick", type=int, default=None, metavar="TICKS",
+ "-m",
+ "--abs-max-tick",
+ type=int,
+ default=m5.MaxTick,
+ metavar="TICKS",
+ help="Run to absolute simulated tick "
+ "specified including ticks from a restored checkpoint",
+ )
+ parser.add_argument(
+ "--rel-max-tick",
+ type=int,
+ default=None,
+ metavar="TICKS",
help="Simulate for specified number of"
" ticks relative to the simulation start tick (e.g. if "
- "restoring a checkpoint)")
- parser.add_argument("--maxtime", type=float, default=None,
- help="Run to the specified absolute simulated time in "
- "seconds")
+ "restoring a checkpoint)",
+ )
parser.add_argument(
- "-P", "--param", action="append", default=[],
+ "--maxtime",
+ type=float,
+ default=None,
+ help="Run to the specified absolute simulated time in " "seconds",
+ )
+ parser.add_argument(
+ "-P",
+ "--param",
+ action="append",
+ default=[],
help="Set a SimObject parameter relative to the root node. "
"An extended Python multi range slicing syntax can be used "
"for arrays. For example: "
"'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
"sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
"Direct parameters of the root object are not accessible, "
- "only parameters of its children.")
+ "only parameters of its children.",
+ )
+
# Add common options that assume a non-NULL ISA.
@@ -187,273 +242,519 @@
addNoISAOptions(parser)
# system options
- parser.add_argument("--list-cpu-types",
- action=ListCpu, nargs=0,
- help="List available CPU types")
- parser.add_argument("--cpu-type", default="AtomicSimpleCPU",
- choices=ObjectList.cpu_list.get_names(),
- help="type of cpu to run with")
- parser.add_argument("--list-bp-types",
- action=ListBp, nargs=0,
- help="List available branch predictor types")
- parser.add_argument("--list-indirect-bp-types",
- action=ListIndirectBP, nargs=0,
- help="List available indirect branch predictor types")
- parser.add_argument("--bp-type", default=None,
- choices=ObjectList.bp_list.get_names(),
- help="""
+ parser.add_argument(
+ "--list-cpu-types",
+ action=ListCpu,
+ nargs=0,
+ help="List available CPU types",
+ )
+ parser.add_argument(
+ "--cpu-type",
+ default="AtomicSimpleCPU",
+ choices=ObjectList.cpu_list.get_names(),
+ help="type of cpu to run with",
+ )
+ parser.add_argument(
+ "--list-bp-types",
+ action=ListBp,
+ nargs=0,
+ help="List available branch predictor types",
+ )
+ parser.add_argument(
+ "--list-indirect-bp-types",
+ action=ListIndirectBP,
+ nargs=0,
+ help="List available indirect branch predictor types",
+ )
+ parser.add_argument(
+ "--bp-type",
+ default=None,
+ choices=ObjectList.bp_list.get_names(),
+ help="""
type of branch predictor to run with
(if not set, use the default branch predictor of
- the selected CPU)""")
- parser.add_argument("--indirect-bp-type", default=None,
- choices=ObjectList.indirect_bp_list.get_names(),
- help="type of indirect branch predictor to run with")
+ the selected CPU)""",
+ )
+ parser.add_argument(
+ "--indirect-bp-type",
+ default=None,
+ choices=ObjectList.indirect_bp_list.get_names(),
+ help="type of indirect branch predictor to run with",
+ )
- parser.add_argument("--list-rp-types",
- action=ListRP, nargs=0,
- help="List available replacement policy types")
+ parser.add_argument(
+ "--list-rp-types",
+ action=ListRP,
+ nargs=0,
+ help="List available replacement policy types",
+ )
- parser.add_argument("--list-hwp-types",
- action=ListHWP, nargs=0,
- help="List available hardware prefetcher types")
- parser.add_argument("--l1i-hwp-type", default=None,
- choices=ObjectList.hwp_list.get_names(),
- help="""
+ parser.add_argument(
+ "--list-hwp-types",
+ action=ListHWP,
+ nargs=0,
+ help="List available hardware prefetcher types",
+ )
+ parser.add_argument(
+ "--l1i-hwp-type",
+ default=None,
+ choices=ObjectList.hwp_list.get_names(),
+ help="""
type of hardware prefetcher to use with the L1
instruction cache.
(if not set, use the default prefetcher of
- the selected cache)""")
- parser.add_argument("--l1d-hwp-type", default=None,
- choices=ObjectList.hwp_list.get_names(),
- help="""
+ the selected cache)""",
+ )
+ parser.add_argument(
+ "--l1d-hwp-type",
+ default=None,
+ choices=ObjectList.hwp_list.get_names(),
+ help="""
type of hardware prefetcher to use with the L1
data cache.
(if not set, use the default prefetcher of
- the selected cache)""")
- parser.add_argument("--l2-hwp-type", default=None,
- choices=ObjectList.hwp_list.get_names(),
- help="""
+ the selected cache)""",
+ )
+ parser.add_argument(
+ "--l2-hwp-type",
+ default=None,
+ choices=ObjectList.hwp_list.get_names(),
+ help="""
type of hardware prefetcher to use with the L2 cache.
(if not set, use the default prefetcher of
- the selected cache)""")
+ the selected cache)""",
+ )
parser.add_argument("--checker", action="store_true")
- parser.add_argument("--cpu-clock", action="store", type=str,
- default='2GHz',
- help="Clock for blocks running at CPU speed")
- parser.add_argument("--smt", action="store_true", default=False,
- help="""
+ parser.add_argument(
+ "--cpu-clock",
+ action="store",
+ type=str,
+ default="2GHz",
+ help="Clock for blocks running at CPU speed",
+ )
+ parser.add_argument(
+ "--smt",
+ action="store_true",
+ default=False,
+ help="""
Only used if multiple programs are specified. If true,
then the number of threads per cpu is same as the
- number of programs.""")
+ number of programs.""",
+ )
parser.add_argument(
- "--elastic-trace-en", action="store_true",
+ "--elastic-trace-en",
+ action="store_true",
help="""Enable capture of data dependency and instruction
- fetch traces using elastic trace probe.""")
+ fetch traces using elastic trace probe.""",
+ )
# Trace file paths input to trace probe in a capture simulation and input
# to Trace CPU in a replay simulation
- parser.add_argument("--inst-trace-file", action="store", type=str,
- help="""Instruction fetch trace file input to
+ parser.add_argument(
+ "--inst-trace-file",
+ action="store",
+ type=str,
+ help="""Instruction fetch trace file input to
Elastic Trace probe in a capture simulation and
- Trace CPU in a replay simulation""", default="")
- parser.add_argument("--data-trace-file", action="store", type=str,
- help="""Data dependency trace file input to
+ Trace CPU in a replay simulation""",
+ default="",
+ )
+ parser.add_argument(
+ "--data-trace-file",
+ action="store",
+ type=str,
+ help="""Data dependency trace file input to
Elastic Trace probe in a capture simulation and
- Trace CPU in a replay simulation""", default="")
+ Trace CPU in a replay simulation""",
+ default="",
+ )
# dist-gem5 options
- parser.add_argument("--dist", action="store_true",
- help="Parallel distributed gem5 simulation.")
parser.add_argument(
- "--dist-sync-on-pseudo-op", action="store_true",
- help="Use a pseudo-op to start dist-gem5 synchronization.")
+ "--dist",
+ action="store_true",
+ help="Parallel distributed gem5 simulation.",
+ )
parser.add_argument(
- "--is-switch", action="store_true",
+ "--dist-sync-on-pseudo-op",
+ action="store_true",
+ help="Use a pseudo-op to start dist-gem5 synchronization.",
+ )
+ parser.add_argument(
+ "--is-switch",
+ action="store_true",
help="Select the network switch simulator process for a"
- "distributed gem5 run")
- parser.add_argument("--dist-rank", default=0, action="store", type=int,
- help="Rank of this system within the dist gem5 run.")
+ "distributed gem5 run",
+ )
parser.add_argument(
- "--dist-size", default=0, action="store", type=int,
- help="Number of gem5 processes within the dist gem5 run.")
+ "--dist-rank",
+ default=0,
+ action="store",
+ type=int,
+ help="Rank of this system within the dist gem5 run.",
+ )
parser.add_argument(
- "--dist-server-name", default="127.0.0.1", action="store", type=str,
- help="Name of the message server host\nDEFAULT: localhost")
- parser.add_argument("--dist-server-port",
- default=2200,
- action="store", type=int,
- help="Message server listen port\nDEFAULT: 2200")
+ "--dist-size",
+ default=0,
+ action="store",
+ type=int,
+ help="Number of gem5 processes within the dist gem5 run.",
+ )
parser.add_argument(
- "--dist-sync-repeat", default="0us", action="store", type=str,
+ "--dist-server-name",
+ default="127.0.0.1",
+ action="store",
+ type=str,
+ help="Name of the message server host\nDEFAULT: localhost",
+ )
+ parser.add_argument(
+ "--dist-server-port",
+ default=2200,
+ action="store",
+ type=int,
+ help="Message server listen port\nDEFAULT: 2200",
+ )
+ parser.add_argument(
+ "--dist-sync-repeat",
+ default="0us",
+ action="store",
+ type=str,
help="Repeat interval for synchronisation barriers among "
- "dist-gem5 processes\nDEFAULT: --ethernet-linkdelay")
+ "dist-gem5 processes\nDEFAULT: --ethernet-linkdelay",
+ )
parser.add_argument(
- "--dist-sync-start", default="5200000000000t", action="store",
+ "--dist-sync-start",
+ default="5200000000000t",
+ action="store",
type=str,
help="Time to schedule the first dist synchronisation barrier\n"
- "DEFAULT:5200000000000t")
- parser.add_argument("--ethernet-linkspeed", default="10Gbps",
- action="store", type=str,
- help="Link speed in bps\nDEFAULT: 10Gbps")
- parser.add_argument("--ethernet-linkdelay", default="10us",
- action="store", type=str,
- help="Link delay in seconds\nDEFAULT: 10us")
+ "DEFAULT:5200000000000t",
+ )
+ parser.add_argument(
+ "--ethernet-linkspeed",
+ default="10Gbps",
+ action="store",
+ type=str,
+ help="Link speed in bps\nDEFAULT: 10Gbps",
+ )
+ parser.add_argument(
+ "--ethernet-linkdelay",
+ default="10us",
+ action="store",
+ type=str,
+ help="Link delay in seconds\nDEFAULT: 10us",
+ )
# Run duration options
- parser.add_argument("-I", "--maxinsts", action="store", type=int,
- default=None, help="""Total number of instructions to
- simulate (default: run forever)""")
- parser.add_argument("--work-item-id", action="store", type=int,
- help="the specific work id for exit & checkpointing")
- parser.add_argument("--num-work-ids", action="store", type=int,
- help="Number of distinct work item types")
- parser.add_argument("--work-begin-cpu-id-exit", action="store", type=int,
- help="exit when work starts on the specified cpu")
- parser.add_argument("--work-end-exit-count", action="store", type=int,
- help="exit at specified work end count")
- parser.add_argument("--work-begin-exit-count", action="store", type=int,
- help="exit at specified work begin count")
- parser.add_argument("--init-param", action="store", type=int, default=0,
- help="""Parameter available in simulation with m5
- initparam""")
parser.add_argument(
- "--initialize-only", action="store_true", default=False,
+ "-I",
+ "--maxinsts",
+ action="store",
+ type=int,
+ default=None,
+ help="""Total number of instructions to
+ simulate (default: run forever)""",
+ )
+ parser.add_argument(
+ "--work-item-id",
+ action="store",
+ type=int,
+ help="the specific work id for exit & checkpointing",
+ )
+ parser.add_argument(
+ "--num-work-ids",
+ action="store",
+ type=int,
+ help="Number of distinct work item types",
+ )
+ parser.add_argument(
+ "--work-begin-cpu-id-exit",
+ action="store",
+ type=int,
+ help="exit when work starts on the specified cpu",
+ )
+ parser.add_argument(
+ "--work-end-exit-count",
+ action="store",
+ type=int,
+ help="exit at specified work end count",
+ )
+ parser.add_argument(
+ "--work-begin-exit-count",
+ action="store",
+ type=int,
+ help="exit at specified work begin count",
+ )
+ parser.add_argument(
+ "--init-param",
+ action="store",
+ type=int,
+ default=0,
+ help="""Parameter available in simulation with m5
+ initparam""",
+ )
+ parser.add_argument(
+ "--initialize-only",
+ action="store_true",
+ default=False,
help="""Exit after initialization. Do not simulate time.
- Useful when gem5 is run as a library.""")
+ Useful when gem5 is run as a library.""",
+ )
# Simpoint options
- parser.add_argument("--simpoint-profile", action="store_true",
- help="Enable basic block profiling for SimPoints")
- parser.add_argument("--simpoint-interval", type=int, default=10000000,
- help="SimPoint interval in num of instructions")
parser.add_argument(
- "--take-simpoint-checkpoints", action="store", type=str,
- help="<simpoint file,weight file,interval-length,warmup-length>")
- parser.add_argument("--restore-simpoint-checkpoint", action="store_true",
- default=False,
- help="restore from a simpoint checkpoint taken with " +
- "--take-simpoint-checkpoints")
+ "--simpoint-profile",
+ action="store_true",
+ help="Enable basic block profiling for SimPoints",
+ )
+ parser.add_argument(
+ "--simpoint-interval",
+ type=int,
+ default=10000000,
+ help="SimPoint interval in num of instructions",
+ )
+ parser.add_argument(
+ "--take-simpoint-checkpoints",
+ action="store",
+ type=str,
+ help="<simpoint file,weight file,interval-length,warmup-length>",
+ )
+ parser.add_argument(
+ "--restore-simpoint-checkpoint",
+ action="store_true",
+ default=False,
+ help="restore from a simpoint checkpoint taken with "
+ + "--take-simpoint-checkpoints",
+ )
# Checkpointing options
# Note that performing checkpointing via python script files will override
# checkpoint instructions built into binaries.
parser.add_argument(
- "--take-checkpoints", action="store", type=str,
- help="<M,N> take checkpoints at tick M and every N ticks thereafter")
+ "--take-checkpoints",
+ action="store",
+ type=str,
+ help="<M,N> take checkpoints at tick M and every N ticks thereafter",
+ )
parser.add_argument(
- "--max-checkpoints", action="store", type=int,
- help="the maximum number of checkpoints to drop", default=5)
+ "--max-checkpoints",
+ action="store",
+ type=int,
+ help="the maximum number of checkpoints to drop",
+ default=5,
+ )
parser.add_argument(
- "--checkpoint-dir", action="store", type=str,
- help="Place all checkpoints in this absolute directory")
- parser.add_argument("-r", "--checkpoint-restore", action="store", type=int,
- help="restore from checkpoint <N>")
- parser.add_argument("--checkpoint-at-end", action="store_true",
- help="take a checkpoint at end of run")
+ "--checkpoint-dir",
+ action="store",
+ type=str,
+ help="Place all checkpoints in this absolute directory",
+ )
parser.add_argument(
- "--work-begin-checkpoint-count", action="store", type=int,
- help="checkpoint at specified work begin count")
+ "-r",
+ "--checkpoint-restore",
+ action="store",
+ type=int,
+ help="restore from checkpoint <N>",
+ )
parser.add_argument(
- "--work-end-checkpoint-count", action="store", type=int,
- help="checkpoint at specified work end count")
+ "--checkpoint-at-end",
+ action="store_true",
+ help="take a checkpoint at end of run",
+ )
parser.add_argument(
- "--work-cpus-checkpoint-count", action="store", type=int,
- help="checkpoint and exit when active cpu count is reached")
- parser.add_argument("--restore-with-cpu", action="store",
- default="AtomicSimpleCPU",
- choices=ObjectList.cpu_list.get_names(),
- help="cpu type for restoring from a checkpoint")
+ "--work-begin-checkpoint-count",
+ action="store",
+ type=int,
+ help="checkpoint at specified work begin count",
+ )
+ parser.add_argument(
+ "--work-end-checkpoint-count",
+ action="store",
+ type=int,
+ help="checkpoint at specified work end count",
+ )
+ parser.add_argument(
+ "--work-cpus-checkpoint-count",
+ action="store",
+ type=int,
+ help="checkpoint and exit when active cpu count is reached",
+ )
+ parser.add_argument(
+ "--restore-with-cpu",
+ action="store",
+ default="AtomicSimpleCPU",
+ choices=ObjectList.cpu_list.get_names(),
+ help="cpu type for restoring from a checkpoint",
+ )
# CPU Switching - default switch model goes from a checkpoint
# to a timing simple CPU with caches to warm up, then to detailed CPU for
# data measurement
parser.add_argument(
- "--repeat-switch", action="store", type=int, default=None,
- help="switch back and forth between CPUs with period <N>")
+ "--repeat-switch",
+ action="store",
+ type=int,
+ default=None,
+ help="switch back and forth between CPUs with period <N>",
+ )
parser.add_argument(
- "-s", "--standard-switch", action="store", type=int, default=None,
- help="switch from timing to Detailed CPU after warmup period of <N>")
- parser.add_argument("-p", "--prog-interval", type=str,
- help="CPU Progress Interval")
+ "-s",
+ "--standard-switch",
+ action="store",
+ type=int,
+ default=None,
+ help="switch from timing to Detailed CPU after warmup period of <N>",
+ )
+ parser.add_argument(
+ "-p", "--prog-interval", type=str, help="CPU Progress Interval"
+ )
# Fastforwarding and simpoint related materials
parser.add_argument(
- "-W", "--warmup-insts", action="store", type=int, default=None,
- help="Warmup period in total instructions (requires --standard-switch)")
+ "-W",
+ "--warmup-insts",
+ action="store",
+ type=int,
+ default=None,
+ help="Warmup period in total instructions (requires --standard-switch)",
+ )
parser.add_argument(
- "--bench", action="store", type=str, default=None,
- help="base names for --take-checkpoint and --checkpoint-restore")
+ "--bench",
+ action="store",
+ type=str,
+ default=None,
+ help="base names for --take-checkpoint and --checkpoint-restore",
+ )
parser.add_argument(
- "-F", "--fast-forward", action="store", type=str, default=None,
- help="Number of instructions to fast forward before switching")
+ "-F",
+ "--fast-forward",
+ action="store",
+ type=str,
+ default=None,
+ help="Number of instructions to fast forward before switching",
+ )
parser.add_argument(
- "-S", "--simpoint", action="store_true", default=False,
+ "-S",
+ "--simpoint",
+ action="store_true",
+ default=False,
help="""Use workload simpoints as an instruction offset for
- --checkpoint-restore or --take-checkpoint.""")
+ --checkpoint-restore or --take-checkpoint.""",
+ )
parser.add_argument(
- "--at-instruction", action="store_true", default=False,
+ "--at-instruction",
+ action="store_true",
+ default=False,
help="""Treat value of --checkpoint-restore or --take-checkpoint as a
- number of instructions.""")
- parser.add_argument("--spec-input", default="ref",
- choices=["ref", "test", "train", "smred", "mdred",
- "lgred"],
- help="Input set size for SPEC CPU2000 benchmarks.")
- parser.add_argument("--arm-iset", default="arm",
- choices=["arm", "thumb", "aarch64"],
- help="ARM instruction set.")
+ number of instructions.""",
+ )
parser.add_argument(
- "--stats-root", action="append", default=[],
+ "--spec-input",
+ default="ref",
+ choices=["ref", "test", "train", "smred", "mdred", "lgred"],
+ help="Input set size for SPEC CPU2000 benchmarks.",
+ )
+ parser.add_argument(
+ "--arm-iset",
+ default="arm",
+ choices=["arm", "thumb", "aarch64"],
+ help="ARM instruction set.",
+ )
+ parser.add_argument(
+ "--stats-root",
+ action="append",
+ default=[],
help="If given, dump only stats of objects under the given SimObject. "
"SimObjects are identified with Python notation as in: "
"system.cpu[0].mmu. All elements of an array can be selected at "
"once with: system.cpu[:].mmu. If given multiple times, dump stats "
"that are present under any of the roots. If not given, dump all "
- "stats. ")
+ "stats. ",
+ )
+ parser.add_argument(
+ "--override-vendor-string",
+ action="store",
+ type=str,
+ default=None,
+ help="Override vendor string returned by CPUID instruction in X86.",
+ )
def addSEOptions(parser):
# Benchmark options
- parser.add_argument("-c", "--cmd", default="",
- help="The binary to run in syscall emulation mode.")
- parser.add_argument("-o", "--options", default="",
- help="""The options to pass to the binary, use " "
- around the entire string""")
- parser.add_argument("-e", "--env", default="",
- help="Initialize workload environment from text file.")
- parser.add_argument("-i", "--input", default="",
- help="Read stdin from a file.")
- parser.add_argument("--output", default="",
- help="Redirect stdout to a file.")
- parser.add_argument("--errout", default="",
- help="Redirect stderr to a file.")
- parser.add_argument("--chroot", action="store", type=str, default=None,
- help="The chroot option allows a user to alter the "
- "search path for processes running in SE mode. "
- "Normally, the search path would begin at the "
- "root of the filesystem (i.e. /). With chroot, "
- "a user can force the process to begin looking at"
- "some other location (i.e. /home/user/rand_dir)."
- "The intended use is to trick sophisticated "
- "software which queries the __HOST__ filesystem "
- "for information or functionality. Instead of "
- "finding files on the __HOST__ filesystem, the "
- "process will find the user's replacment files.")
- parser.add_argument("--interp-dir", action="store", type=str,
- default=None,
- help="The interp-dir option is used for "
- "setting the interpreter's path. This will "
- "allow to load the guest dynamic linker/loader "
- "itself from the elf binary. The option points to "
- "the parent folder of the guest /lib in the "
- "host fs")
+ parser.add_argument(
+ "-c",
+ "--cmd",
+ default="",
+ help="The binary to run in syscall emulation mode.",
+ )
+ parser.add_argument(
+ "-o",
+ "--options",
+ default="",
+ help="""The options to pass to the binary, use " "
+ around the entire string""",
+ )
+ parser.add_argument(
+ "-e",
+ "--env",
+ default="",
+ help="Initialize workload environment from text file.",
+ )
+ parser.add_argument(
+ "-i", "--input", default="", help="Read stdin from a file."
+ )
+ parser.add_argument(
+ "--output", default="", help="Redirect stdout to a file."
+ )
+ parser.add_argument(
+ "--errout", default="", help="Redirect stderr to a file."
+ )
+ parser.add_argument(
+ "--chroot",
+ action="store",
+ type=str,
+ default=None,
+ help="The chroot option allows a user to alter the "
+ "search path for processes running in SE mode. "
+ "Normally, the search path would begin at the "
+ "root of the filesystem (i.e. /). With chroot, "
+ "a user can force the process to begin looking at"
+ "some other location (i.e. /home/user/rand_dir)."
+ "The intended use is to trick sophisticated "
+ "software which queries the __HOST__ filesystem "
+ "for information or functionality. Instead of "
+ "finding files on the __HOST__ filesystem, the "
+ "process will find the user's replacment files.",
+ )
+ parser.add_argument(
+ "--interp-dir",
+ action="store",
+ type=str,
+ default=None,
+ help="The interp-dir option is used for "
+ "setting the interpreter's path. This will "
+ "allow to load the guest dynamic linker/loader "
+ "itself from the elf binary. The option points to "
+ "the parent folder of the guest /lib in the "
+ "host fs",
+ )
- parser.add_argument("--redirects", action="append", type=str,
- default=[],
- help="A collection of one or more redirect paths "
- "to be used in syscall emulation."
- "Usage: gem5.opt [...] --redirects /dir1=/path/"
- "to/host/dir1 --redirects /dir2=/path/to/host/dir2")
- parser.add_argument("--wait-gdb", default=False, action='store_true',
- help="Wait for remote GDB to connect.")
+ parser.add_argument(
+ "--redirects",
+ action="append",
+ type=str,
+ default=[],
+ help="A collection of one or more redirect paths "
+ "to be used in syscall emulation."
+ "Usage: gem5.opt [...] --redirects /dir1=/path/"
+ "to/host/dir1 --redirects /dir2=/path/to/host/dir2",
+ )
+ parser.add_argument(
+ "--wait-gdb",
+ default=False,
+ action="store_true",
+ help="Wait for remote GDB to connect.",
+ )
def addFSOptions(parser):
@@ -461,73 +762,128 @@
# Simulation options
parser.add_argument(
- "--timesync", action="store_true",
- help="Prevent simulated time from getting ahead of real time")
+ "--timesync",
+ action="store_true",
+ help="Prevent simulated time from getting ahead of real time",
+ )
# System options
parser.add_argument("--kernel", action="store", type=str)
- parser.add_argument("--os-type", action="store",
- choices=os_types[str(buildEnv['TARGET_ISA'])],
- default="linux",
- help="Specifies type of OS to boot")
+ parser.add_argument(
+ "--os-type",
+ action="store",
+ choices=os_types,
+ default="linux",
+ help="Specifies type of OS to boot",
+ )
parser.add_argument("--script", action="store", type=str)
parser.add_argument(
- "--frame-capture", action="store_true",
+ "--frame-capture",
+ action="store_true",
help="Stores changed frame buffers from the VNC server to compressed "
- "files in the gem5 output directory")
+ "files in the gem5 output directory",
+ )
- if buildEnv['TARGET_ISA'] == "arm":
+ if buildEnv["USE_ARM_ISA"]:
parser.add_argument(
- "--bare-metal", action="store_true",
- help="Provide the raw system without the linux specific bits")
- parser.add_argument("--list-machine-types",
- action=ListPlatform, nargs=0,
- help="List available platform types")
- parser.add_argument("--machine-type", action="store",
- choices=ObjectList.platform_list.get_names(),
- default="VExpress_GEM5_V1")
+ "--bare-metal",
+ action="store_true",
+ help="Provide the raw system without the linux specific bits",
+ )
parser.add_argument(
- "--dtb-filename", action="store", type=str,
+ "--list-machine-types",
+ action=ListPlatform,
+ nargs=0,
+ help="List available platform types",
+ )
+ parser.add_argument(
+ "--machine-type",
+ action="store",
+ choices=ObjectList.platform_list.get_names(),
+ default="VExpress_GEM5_V1",
+ )
+ parser.add_argument(
+ "--dtb-filename",
+ action="store",
+ type=str,
help="Specifies device tree blob file to use with device-tree-"
- "enabled kernels")
+ "enabled kernels",
+ )
parser.add_argument(
- "--enable-context-switch-stats-dump", action="store_true",
+ "--enable-context-switch-stats-dump",
+ action="store_true",
help="Enable stats dump at context "
- "switches and dump tasks file (required for Streamline)")
+ "switches and dump tasks file (required for Streamline)",
+ )
parser.add_argument("--vio-9p", action="store_true", help=vio_9p_help)
parser.add_argument(
- "--bootloader", action='append',
- help="executable file that runs before the --kernel")
+ "--bootloader",
+ action="append",
+ help="executable file that runs before the --kernel",
+ )
# Benchmark options
parser.add_argument(
- "--dual", action="store_true",
- help="Simulate two systems attached with an ethernet link")
+ "--dual",
+ action="store_true",
+ help="Simulate two systems attached with an ethernet link",
+ )
parser.add_argument(
- "-b", "--benchmark", action="store", type=str, dest="benchmark",
- help="Specify the benchmark to run. Available benchmarks: %s" %
- DefinedBenchmarks)
+ "-b",
+ "--benchmark",
+ action="store",
+ type=str,
+ dest="benchmark",
+ help="Specify the benchmark to run. Available benchmarks: %s"
+ % DefinedBenchmarks,
+ )
# Metafile options
parser.add_argument(
- "--etherdump", action="store", type=str, dest="etherdump",
+ "--etherdump",
+ action="store",
+ type=str,
+ dest="etherdump",
help="Specify the filename to dump a pcap capture of the"
- "ethernet traffic")
+ "ethernet traffic",
+ )
# Disk Image Options
- parser.add_argument("--disk-image", action="append", type=str,
- default=[], help="Path to the disk images to use.")
- parser.add_argument("--root-device", action="store", type=str,
- default=None, help="OS device name for root partition")
+ parser.add_argument(
+ "--disk-image",
+ action="append",
+ type=str,
+ default=[],
+ help="Path to the disk images to use.",
+ )
+ parser.add_argument(
+ "--root-device",
+ action="store",
+ type=str,
+ default=None,
+ help="OS device name for root partition",
+ )
# Command line options
- parser.add_argument("--command-line", action="store", type=str,
- default=None,
- help="Template for the kernel command line.")
parser.add_argument(
- "--command-line-file", action="store", default=None, type=str,
- help="File with a template for the kernel command line")
+ "--command-line",
+ action="store",
+ type=str,
+ default=None,
+ help="Template for the kernel command line.",
+ )
+ parser.add_argument(
+ "--command-line-file",
+ action="store",
+ default=None,
+ type=str,
+ help="File with a template for the kernel command line",
+ )
# Debug option
- parser.add_argument("--wait-gdb", default=False, action='store_true',
- help="Wait for remote GDB to connect.")
+ parser.add_argument(
+ "--wait-gdb",
+ default=False,
+ action="store_true",
+ help="Wait for remote GDB to connect.",
+ )
diff --git a/configs/common/SimpleOpts.py b/configs/common/SimpleOpts.py
index fabc8e0..96c73f5 100644
--- a/configs/common/SimpleOpts.py
+++ b/configs/common/SimpleOpts.py
@@ -44,21 +44,22 @@
# add the args we want to be able to control from the command line
parser = ArgumentParser()
+
def add_option(*args, **kwargs):
- """Call "add_option" to the global options parser
- """
+ """Call "add_option" to the global options parser"""
if called_parse_args:
m5.fatal("Can't add an option after calling SimpleOpts.parse_args")
parser.add_argument(*args, **kwargs)
+
def parse_args():
global called_parse_args
called_parse_args = True
return parser.parse_args()
+
def print_help(*args, **kwargs):
parser.print_help(*args, **kwargs)
-
diff --git a/configs/common/Simulation.py b/configs/common/Simulation.py
index 2416773..731b3fc 100644
--- a/configs/common/Simulation.py
+++ b/configs/common/Simulation.py
@@ -49,27 +49,28 @@
from m5.objects import *
from m5.util import *
-addToPath('../common')
+addToPath("../common")
+
def getCPUClass(cpu_type):
"""Returns the required cpu class and the mode of operation."""
cls = ObjectList.cpu_list.get(cpu_type)
return cls, cls.memory_mode()
+
def setCPUClass(options):
"""Returns two cpu classes and the initial mode of operation.
- Restoring from a checkpoint or fast forwarding through a benchmark
- can be done using one type of cpu, and then the actual
- simulation can be carried out using another type. This function
- returns these two types of cpus and the initial mode of operation
- depending on the options provided.
+ Restoring from a checkpoint or fast forwarding through a benchmark
+ can be done using one type of cpu, and then the actual
+ simulation can be carried out using another type. This function
+ returns these two types of cpus and the initial mode of operation
+ depending on the options provided.
"""
TmpClass, test_mem_mode = getCPUClass(options.cpu_type)
CPUClass = None
- if TmpClass.require_caches() and \
- not options.caches and not options.ruby:
+ if TmpClass.require_caches() and not options.caches and not options.ruby:
fatal("%s must be used with caches" % options.cpu_type)
if options.checkpoint_restore != None:
@@ -79,20 +80,22 @@
elif options.fast_forward:
CPUClass = TmpClass
TmpClass = AtomicSimpleCPU
- test_mem_mode = 'atomic'
+ test_mem_mode = "atomic"
# Ruby only supports atomic accesses in noncaching mode
- if test_mem_mode == 'atomic' and options.ruby:
+ if test_mem_mode == "atomic" and options.ruby:
warn("Memory mode will be changed to atomic_noncaching")
- test_mem_mode = 'atomic_noncaching'
+ test_mem_mode = "atomic_noncaching"
return (TmpClass, test_mem_mode, CPUClass)
+
def setMemClass(options):
"""Returns a memory controller class."""
return ObjectList.mem_list.get(options.mem_type)
+
def setWorkCountOptions(system, options):
if options.work_item_id != None:
system.work_item_id = options.work_item_id
@@ -111,6 +114,7 @@
if options.work_cpus_checkpoint_count != None:
system.work_cpus_ckpt_count = options.work_cpus_checkpoint_count
+
def findCptDir(options, cptdir, testsys):
"""Figures out the directory from which the checkpointed state is read.
@@ -137,7 +141,7 @@
if options.simpoint:
# assume workload 0 has the simpoint
if testsys.cpu[0].workload[0].simpoint == 0:
- fatal('Unable to find simpoint')
+ fatal("Unable to find simpoint")
inst += int(testsys.cpu[0].workload[0].simpoint)
checkpoint_dir = joinpath(cptdir, "cpt.%s.%s" % (options.bench, inst))
@@ -148,8 +152,10 @@
# Restore from SimPoint checkpoints
# Assumes that the checkpoint dir names are formatted as follows:
dirs = listdir(cptdir)
- expr = re.compile('cpt\.simpoint_(\d+)_inst_(\d+)' +
- '_weight_([\d\.e\-]+)_interval_(\d+)_warmup_(\d+)')
+ expr = re.compile(
+ "cpt\.simpoint_(\d+)_inst_(\d+)"
+ + "_weight_([\d\.e\-]+)_interval_(\d+)_warmup_(\d+)"
+ )
cpts = []
for dir in dirs:
match = expr.match(dir)
@@ -159,7 +165,7 @@
cpt_num = options.checkpoint_restore
if cpt_num > len(cpts):
- fatal('Checkpoint %d not found', cpt_num)
+ fatal("Checkpoint %d not found", cpt_num)
checkpoint_dir = joinpath(cptdir, cpts[cpt_num - 1])
match = expr.match(cpts[cpt_num - 1])
if match:
@@ -176,30 +182,33 @@
if testsys.switch_cpus != None:
testsys.switch_cpus[0].simpoint_start_insts = simpoint_start_insts
- print("Resuming from SimPoint", end=' ')
- print("#%d, start_inst:%d, weight:%f, interval:%d, warmup:%d" %
- (index, start_inst, weight_inst, interval_length, warmup_length))
+ print("Resuming from SimPoint", end=" ")
+ print(
+ "#%d, start_inst:%d, weight:%f, interval:%d, warmup:%d"
+ % (index, start_inst, weight_inst, interval_length, warmup_length)
+ )
else:
dirs = listdir(cptdir)
- expr = re.compile('cpt\.([0-9]+)')
+ expr = re.compile("cpt\.([0-9]+)")
cpts = []
for dir in dirs:
match = expr.match(dir)
if match:
cpts.append(match.group(1))
- cpts.sort(key = lambda a: int(a))
+ cpts.sort(key=lambda a: int(a))
cpt_num = options.checkpoint_restore
if cpt_num > len(cpts):
- fatal('Checkpoint %d not found', cpt_num)
+ fatal("Checkpoint %d not found", cpt_num)
cpt_starttick = int(cpts[cpt_num - 1])
checkpoint_dir = joinpath(cptdir, "cpt.%s" % cpts[cpt_num - 1])
return cpt_starttick, checkpoint_dir
+
def scriptCheckpoints(options, maxtick, cptdir):
if options.at_instruction or options.simpoint:
checkpoint_inst = int(options.take_checkpoints)
@@ -219,8 +228,11 @@
exit_cause = exit_event.getCause()
if exit_cause == "a thread reached the max instruction count":
- m5.checkpoint(joinpath(cptdir, "cpt.%s.%d" % \
- (options.bench, checkpoint_inst)))
+ m5.checkpoint(
+ joinpath(
+ cptdir, "cpt.%s.%d" % (options.bench, checkpoint_inst)
+ )
+ )
print("Checkpoint written.")
else:
@@ -242,8 +254,10 @@
sim_ticks = when
max_checkpoints = options.max_checkpoints
- while num_checkpoints < max_checkpoints and \
- exit_cause == "simulate() limit reached":
+ while (
+ num_checkpoints < max_checkpoints
+ and exit_cause == "simulate() limit reached"
+ ):
if (sim_ticks + period) > maxtick:
exit_event = m5.simulate(maxtick - sim_ticks)
exit_cause = exit_event.getCause()
@@ -260,6 +274,7 @@
return exit_event
+
def benchCheckpoints(options, maxtick, cptdir):
exit_event = m5.simulate(maxtick - m5.curTick())
exit_cause = exit_event.getCause()
@@ -279,13 +294,18 @@
return exit_event
+
# Set up environment for taking SimPoint checkpoints
# Expecting SimPoint files generated by SimPoint 3.2
def parseSimpointAnalysisFile(options, testsys):
import re
- simpoint_filename, weight_filename, interval_length, warmup_length = \
- options.take_simpoint_checkpoints.split(",", 3)
+ (
+ simpoint_filename,
+ weight_filename,
+ interval_length,
+ warmup_length,
+ ) = options.take_simpoint_checkpoints.split(",", 3)
print("simpoint analysis file:", simpoint_filename)
print("simpoint weight file:", weight_filename)
print("interval length:", interval_length)
@@ -309,20 +329,19 @@
if m:
interval = int(m.group(1))
else:
- fatal('unrecognized line in simpoint file!')
+ fatal("unrecognized line in simpoint file!")
line = weight_file.readline()
if not line:
- fatal('not enough lines in simpoint weight file!')
+ fatal("not enough lines in simpoint weight file!")
m = re.match("([0-9\.e\-]+)\s+(\d+)", line)
if m:
weight = float(m.group(1))
else:
- fatal('unrecognized line in simpoint weight file!')
+ fatal("unrecognized line in simpoint weight file!")
- if (interval * interval_length - warmup_length > 0):
- starting_inst_count = \
- interval * interval_length - warmup_length
+ if interval * interval_length - warmup_length > 0:
+ starting_inst_count = interval * interval_length - warmup_length
actual_warmup_length = warmup_length
else:
# Not enough room for proper warmup
@@ -330,15 +349,20 @@
starting_inst_count = 0
actual_warmup_length = interval * interval_length
- simpoints.append((interval, weight, starting_inst_count,
- actual_warmup_length))
+ simpoints.append(
+ (interval, weight, starting_inst_count, actual_warmup_length)
+ )
# Sort SimPoints by starting inst count
simpoints.sort(key=lambda obj: obj[2])
for s in simpoints:
interval, weight, starting_inst_count, actual_warmup_length = s
- print(str(interval), str(weight), starting_inst_count,
- actual_warmup_length)
+ print(
+ str(interval),
+ str(weight),
+ starting_inst_count,
+ actual_warmup_length,
+ )
simpoint_start_insts.append(starting_inst_count)
print("Total # of simpoints:", len(simpoints))
@@ -346,6 +370,7 @@
return (simpoints, interval_length)
+
def takeSimpointCheckpoints(simpoints, interval_length, cptdir):
num_checkpoints = 0
index = 0
@@ -369,22 +394,34 @@
code = exit_event.getCode()
if exit_cause == "simpoint starting point found":
- m5.checkpoint(joinpath(cptdir,
- "cpt.simpoint_%02d_inst_%d_weight_%f_interval_%d_warmup_%d"
- % (index, starting_inst_count, weight, interval_length,
- actual_warmup_length)))
- print("Checkpoint #%d written. start inst:%d weight:%f" %
- (num_checkpoints, starting_inst_count, weight))
+ m5.checkpoint(
+ joinpath(
+ cptdir,
+ "cpt.simpoint_%02d_inst_%d_weight_%f_interval_%d_warmup_%d"
+ % (
+ index,
+ starting_inst_count,
+ weight,
+ interval_length,
+ actual_warmup_length,
+ ),
+ )
+ )
+ print(
+ "Checkpoint #%d written. start inst:%d weight:%f"
+ % (num_checkpoints, starting_inst_count, weight)
+ )
num_checkpoints += 1
last_chkpnt_inst_count = starting_inst_count
else:
break
index += 1
- print('Exiting @ tick %i because %s' % (m5.curTick(), exit_cause))
+ print("Exiting @ tick %i because %s" % (m5.curTick(), exit_cause))
print("%d checkpoints taken" % num_checkpoints)
sys.exit(code)
+
def restoreSimpointCheckpoint():
exit_event = m5.simulate()
exit_cause = exit_event.getCause()
@@ -401,9 +438,10 @@
print("Done running SimPoint!")
sys.exit(exit_event.getCode())
- print('Exiting @ tick %i because %s' % (m5.curTick(), exit_cause))
+ print("Exiting @ tick %i because %s" % (m5.curTick(), exit_cause))
sys.exit(exit_event.getCode())
+
def repeatSwitch(testsys, repeat_switch_cpu_list, maxtick, switch_freq):
print("starting switch loop")
while True:
@@ -424,6 +462,7 @@
exit_event = m5.simulate(maxtick - m5.curTick())
return exit_event
+
def run(options, root, testsys, cpu_class):
if options.checkpoint_dir:
cptdir = options.checkpoint_dir
@@ -461,9 +500,17 @@
for i in range(np):
testsys.cpu[i].max_insts_any_thread = options.maxinsts
+ if options.override_vendor_string is not None:
+ for i in range(len(testsys.cpu)):
+ for j in range(len(testsys.cpu[i].isa)):
+ testsys.cpu[i].isa[
+ j
+ ].vendor_string = options.override_vendor_string
+
if cpu_class:
- switch_cpus = [cpu_class(switched_out=True, cpu_id=(i))
- for i in range(np)]
+ switch_cpus = [
+ cpu_class(switched_out=True, cpu_id=(i)) for i in range(np)
+ ]
for i in range(np):
if options.fast_forward:
@@ -471,8 +518,7 @@
switch_cpus[i].system = testsys
switch_cpus[i].workload = testsys.cpu[i].workload
switch_cpus[i].clk_domain = testsys.cpu[i].clk_domain
- switch_cpus[i].progress_interval = \
- testsys.cpu[i].progress_interval
+ switch_cpus[i].progress_interval = testsys.cpu[i].progress_interval
switch_cpus[i].isa = testsys.cpu[i].isa
# simulation period
if options.maxinsts:
@@ -485,9 +531,11 @@
switch_cpus[i].branchPred = bpClass()
if options.indirect_bp_type:
IndirectBPClass = ObjectList.indirect_bp_list.get(
- options.indirect_bp_type)
- switch_cpus[i].branchPred.indirectBranchPred = \
- IndirectBPClass()
+ options.indirect_bp_type
+ )
+ switch_cpus[
+ i
+ ].branchPred.indirectBranchPred = IndirectBPClass()
switch_cpus[i].createThreads()
# If elastic tracing is enabled attach the elastic trace probe
@@ -500,16 +548,16 @@
if options.repeat_switch:
switch_class = getCPUClass(options.cpu_type)[0]
- if switch_class.require_caches() and \
- not options.caches:
+ if switch_class.require_caches() and not options.caches:
print("%s: Must be used with caches" % str(switch_class))
sys.exit(1)
if not switch_class.support_take_over():
print("%s: CPU switching not supported" % str(switch_class))
sys.exit(1)
- repeat_switch_cpus = [switch_class(switched_out=True, \
- cpu_id=(i)) for i in range(np)]
+ repeat_switch_cpus = [
+ switch_class(switched_out=True, cpu_id=(i)) for i in range(np)
+ ]
for i in range(np):
repeat_switch_cpus[i].system = testsys
@@ -523,24 +571,30 @@
if options.checker:
repeat_switch_cpus[i].addCheckerCpu()
+ repeat_switch_cpus[i].createThreads()
+
testsys.repeat_switch_cpus = repeat_switch_cpus
if cpu_class:
- repeat_switch_cpu_list = [(switch_cpus[i], repeat_switch_cpus[i])
- for i in range(np)]
+ repeat_switch_cpu_list = [
+ (switch_cpus[i], repeat_switch_cpus[i]) for i in range(np)
+ ]
else:
- repeat_switch_cpu_list = [(testsys.cpu[i], repeat_switch_cpus[i])
- for i in range(np)]
+ repeat_switch_cpu_list = [
+ (testsys.cpu[i], repeat_switch_cpus[i]) for i in range(np)
+ ]
if options.standard_switch:
- switch_cpus = [TimingSimpleCPU(switched_out=True, cpu_id=(i))
- for i in range(np)]
- switch_cpus_1 = [DerivO3CPU(switched_out=True, cpu_id=(i))
- for i in range(np)]
+ switch_cpus = [
+ TimingSimpleCPU(switched_out=True, cpu_id=(i)) for i in range(np)
+ ]
+ switch_cpus_1 = [
+ DerivO3CPU(switched_out=True, cpu_id=(i)) for i in range(np)
+ ]
for i in range(np):
- switch_cpus[i].system = testsys
- switch_cpus_1[i].system = testsys
+ switch_cpus[i].system = testsys
+ switch_cpus_1[i].system = testsys
switch_cpus[i].workload = testsys.cpu[i].workload
switch_cpus_1[i].workload = testsys.cpu[i].workload
switch_cpus[i].clk_domain = testsys.cpu[i].clk_domain
@@ -557,16 +611,17 @@
# Fast forward to a simpoint (warning: time consuming)
elif options.simpoint:
if testsys.cpu[i].workload[0].simpoint == 0:
- fatal('simpoint not found')
- testsys.cpu[i].max_insts_any_thread = \
+ fatal("simpoint not found")
+ testsys.cpu[i].max_insts_any_thread = (
testsys.cpu[i].workload[0].simpoint
+ )
# No distance specified, just switch
else:
testsys.cpu[i].max_insts_any_thread = 1
# warmup period
if options.warmup_insts:
- switch_cpus[i].max_insts_any_thread = options.warmup_insts
+ switch_cpus[i].max_insts_any_thread = options.warmup_insts
# simulation period
if options.maxinsts:
@@ -577,25 +632,29 @@
switch_cpus[i].addCheckerCpu()
switch_cpus_1[i].addCheckerCpu()
+ switch_cpus[i].createThreads()
+ switch_cpus_1[i].createThreads()
+
testsys.switch_cpus = switch_cpus
testsys.switch_cpus_1 = switch_cpus_1
- switch_cpu_list = [
- (testsys.cpu[i], switch_cpus[i]) for i in range(np)
- ]
+ switch_cpu_list = [(testsys.cpu[i], switch_cpus[i]) for i in range(np)]
switch_cpu_list1 = [
(switch_cpus[i], switch_cpus_1[i]) for i in range(np)
]
# set the checkpoint in the cpu before m5.instantiate is called
- if options.take_checkpoints != None and \
- (options.simpoint or options.at_instruction):
+ if options.take_checkpoints != None and (
+ options.simpoint or options.at_instruction
+ ):
offset = int(options.take_checkpoints)
# Set an instruction break point
if options.simpoint:
for i in range(np):
if testsys.cpu[i].workload[0].simpoint == 0:
- fatal('no simpoint for testsys.cpu[%d].workload[0]', i)
- checkpoint_inst = int(testsys.cpu[i].workload[0].simpoint) + offset
+ fatal("no simpoint for testsys.cpu[%d].workload[0]", i)
+ checkpoint_inst = (
+ int(testsys.cpu[i].workload[0].simpoint) + offset
+ )
testsys.cpu[i].max_insts_any_thread = checkpoint_inst
# used for output below
options.take_checkpoints = checkpoint_inst
@@ -607,7 +666,9 @@
testsys.cpu[i].max_insts_any_thread = offset
if options.take_simpoint_checkpoints != None:
- simpoints, interval_length = parseSimpointAnalysisFile(options, testsys)
+ simpoints, interval_length = parseSimpointAnalysisFile(
+ options, testsys
+ )
checkpoint_dir = None
if options.checkpoint_restore:
@@ -640,31 +701,43 @@
# the ticks per simulated second
maxtick_from_rel += cpt_starttick
if options.at_instruction or options.simpoint:
- warn("Relative max tick specified with --at-instruction or" \
- " --simpoint\n These options don't specify the " \
- "checkpoint start tick, so assuming\n you mean " \
- "absolute max tick")
+ warn(
+ "Relative max tick specified with --at-instruction or"
+ " --simpoint\n These options don't specify the "
+ "checkpoint start tick, so assuming\n you mean "
+ "absolute max tick"
+ )
explicit_maxticks += 1
if options.maxtime:
maxtick_from_maxtime = m5.ticks.fromSeconds(options.maxtime)
explicit_maxticks += 1
if explicit_maxticks > 1:
- warn("Specified multiple of --abs-max-tick, --rel-max-tick, --maxtime."\
- " Using least")
+ warn(
+ "Specified multiple of --abs-max-tick, --rel-max-tick, --maxtime."
+ " Using least"
+ )
maxtick = min([maxtick_from_abs, maxtick_from_rel, maxtick_from_maxtime])
if options.checkpoint_restore != None and maxtick < cpt_starttick:
- fatal("Bad maxtick (%d) specified: " \
- "Checkpoint starts starts from tick: %d", maxtick, cpt_starttick)
+ fatal(
+ "Bad maxtick (%d) specified: "
+ "Checkpoint starts starts from tick: %d",
+ maxtick,
+ cpt_starttick,
+ )
if options.standard_switch or cpu_class:
if options.standard_switch:
- print("Switch at instruction count:%s" %
- str(testsys.cpu[0].max_insts_any_thread))
+ print(
+ "Switch at instruction count:%s"
+ % str(testsys.cpu[0].max_insts_any_thread)
+ )
exit_event = m5.simulate()
elif cpu_class and options.fast_forward:
- print("Switch at instruction count:%s" %
- str(testsys.cpu[0].max_insts_any_thread))
+ print(
+ "Switch at instruction count:%s"
+ % str(testsys.cpu[0].max_insts_any_thread)
+ )
exit_event = m5.simulate()
else:
print("Switch at curTick count:%s" % str(10000))
@@ -674,32 +747,37 @@
m5.switchCpus(testsys, switch_cpu_list)
if options.standard_switch:
- print("Switch at instruction count:%d" %
- (testsys.switch_cpus[0].max_insts_any_thread))
+ print(
+ "Switch at instruction count:%d"
+ % (testsys.switch_cpus[0].max_insts_any_thread)
+ )
- #warmup instruction count may have already been set
+ # warmup instruction count may have already been set
if options.warmup_insts:
exit_event = m5.simulate()
else:
exit_event = m5.simulate(options.standard_switch)
print("Switching CPUS @ tick %s" % (m5.curTick()))
- print("Simulation ends instruction count:%d" %
- (testsys.switch_cpus_1[0].max_insts_any_thread))
+ print(
+ "Simulation ends instruction count:%d"
+ % (testsys.switch_cpus_1[0].max_insts_any_thread)
+ )
m5.switchCpus(testsys, switch_cpu_list1)
# If we're taking and restoring checkpoints, use checkpoint_dir
# option only for finding the checkpoints to restore from. This
# lets us test checkpointing by restoring from one set of
# checkpoints, generating a second set, and then comparing them.
- if (options.take_checkpoints or options.take_simpoint_checkpoints) \
- and options.checkpoint_restore:
+ if (
+ options.take_checkpoints or options.take_simpoint_checkpoints
+ ) and options.checkpoint_restore:
if m5.options.outdir:
cptdir = m5.options.outdir
else:
cptdir = getcwd()
- if options.take_checkpoints != None :
+ if options.take_checkpoints != None:
# Checkpoints being taken via the command line at <when> and at
# subsequent periods of <period>. Checkpoint instructions
# received from the benchmark running are ignored and skipped in
@@ -722,13 +800,15 @@
# If checkpoints are being taken, then the checkpoint instruction
# will occur in the benchmark code it self.
if options.repeat_switch and maxtick > options.repeat_switch:
- exit_event = repeatSwitch(testsys, repeat_switch_cpu_list,
- maxtick, options.repeat_switch)
+ exit_event = repeatSwitch(
+ testsys, repeat_switch_cpu_list, maxtick, options.repeat_switch
+ )
else:
exit_event = benchCheckpoints(options, maxtick, cptdir)
- print('Exiting @ tick %i because %s' %
- (m5.curTick(), exit_event.getCause()))
+ print(
+ "Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause())
+ )
if options.checkpoint_at_end:
m5.checkpoint(joinpath(cptdir, "cpt.%d"))
diff --git a/configs/common/SysPaths.py b/configs/common/SysPaths.py
index 762efaf..7c0f5bf 100644
--- a/configs/common/SysPaths.py
+++ b/configs/common/SysPaths.py
@@ -29,9 +29,10 @@
config_path = os.path.dirname(os.path.abspath(__file__))
config_root = os.path.dirname(config_path)
+
class PathSearchFunc(object):
_sys_paths = None
- environment_variable = 'M5_PATH'
+ environment_variable = "M5_PATH"
def __init__(self, subdirs, sys_paths=None):
if isinstance(subdirs, str):
@@ -46,9 +47,9 @@
else:
if self._sys_paths is None:
try:
- paths = os.environ[self.environment_variable].split(':')
+ paths = os.environ[self.environment_variable].split(":")
except KeyError:
- paths = [ '/dist/m5/system', '/n/poolfs/z/dist/m5/system' ]
+ paths = ["/dist/m5/system", "/n/poolfs/z/dist/m5/system"]
# expand '~' and '~user' in paths
paths = list(map(os.path.expanduser, paths))
@@ -59,8 +60,10 @@
if not paths:
raise IOError(
"Can't find system files directory, "
- "check your {} environment variable"
- .format(self.environment_variable))
+ "check your {} environment variable".format(
+ self.environment_variable
+ )
+ )
self._sys_paths = list(paths)
@@ -69,9 +72,13 @@
try:
return next(p for p in paths if os.path.exists(p))
except StopIteration:
- raise IOError("Can't find file '{}' on {}."
- .format(filepath, self.environment_variable))
+ raise IOError(
+ "Can't find file '{}' on {}.".format(
+ filepath, self.environment_variable
+ )
+ )
-disk = PathSearchFunc('disks')
-binary = PathSearchFunc('binaries')
-script = PathSearchFunc('boot', sys_paths=[config_root])
+
+disk = PathSearchFunc("disks")
+binary = PathSearchFunc("binaries")
+script = PathSearchFunc("boot", sys_paths=[config_root])
diff --git a/configs/common/__init__.py b/configs/common/__init__.py
index 9b43643..4fe0002 100644
--- a/configs/common/__init__.py
+++ b/configs/common/__init__.py
@@ -32,4 +32,3 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
diff --git a/configs/common/cores/arm/HPI.py b/configs/common/cores/arm/HPI.py
index 3a11133..c7a8127 100644
--- a/configs/common/cores/arm/HPI.py
+++ b/configs/common/cores/arm/HPI.py
@@ -58,41 +58,47 @@
ret_match <<= 1
shift = True
- if char == '_':
+ if char == "_":
shift = False
- elif char == '0':
+ elif char == "0":
ret_mask |= 1
- elif char == '1':
+ elif char == "1":
ret_mask |= 1
ret_match |= 1
- elif char == 'x':
+ elif char == "x":
pass
else:
print("Can't parse implicant character", char)
return (ret_mask, ret_match)
+
# ,----- 36 thumb
# | ,--- 35 bigThumb
# | |,-- 34 aarch64
-a64_inst = make_implicant('0_01xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-a32_inst = make_implicant('0_00xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-t32_inst = make_implicant('1_10xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-t16_inst = make_implicant('1_00xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-any_inst = make_implicant('x_xxxx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+a64_inst = make_implicant("0_01xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
+a32_inst = make_implicant("0_00xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
+t32_inst = make_implicant("1_10xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
+t16_inst = make_implicant("1_00xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
+any_inst = make_implicant("x_xxxx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
# | ||
-any_a64_inst = \
- make_implicant('x_x1xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-any_non_a64_inst = \
- make_implicant('x_x0xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+any_a64_inst = make_implicant(
+ "x_x1xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx"
+)
+any_non_a64_inst = make_implicant(
+ "x_x0xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx"
+)
+
def encode_opcode(pattern):
def encode(opcode_string):
a64_mask, a64_match = pattern
mask, match = make_implicant(opcode_string)
return (a64_mask | mask), (a64_match | match)
+
return encode
+
a64_opcode = encode_opcode(a64_inst)
a32_opcode = encode_opcode(a32_inst)
t32_opcode = encode_opcode(t32_inst)
@@ -100,30 +106,37 @@
# These definitions (in some form) should probably be part of TimingExpr
+
def literal(value):
def body(env):
ret = TimingExprLiteral()
ret.value = value
return ret
+
return body
+
def bin(op, left, right):
def body(env):
ret = TimingExprBin()
- ret.op = 'timingExpr' + op
+ ret.op = "timingExpr" + op
ret.left = left(env)
ret.right = right(env)
return ret
+
return body
+
def un(op, arg):
def body(env):
ret = TimingExprUn()
- ret.op = 'timingExpr' + op
+ ret.op = "timingExpr" + op
ret.arg = arg(env)
return ret
+
return body
+
def ref(name):
def body(env):
if name in env:
@@ -133,8 +146,10 @@
print("Invalid expression name", name)
ret = TimingExprNull()
return ret
+
return body
+
def if_expr(cond, true_expr, false_expr):
def body(env):
ret = TimingExprIf()
@@ -142,21 +157,18 @@
ret.trueExpr = true_expr(env)
ret.falseExpr = false_expr(env)
return ret
+
return body
-def src(index):
+
+def src_reg(index):
def body(env):
ret = TimingExprSrcReg()
ret.index = index
return ret
+
return body
-def int_reg(reg):
- def body(env):
- ret = TimingExprReadIntReg()
- ret.reg = reg(env)
- return ret
- return body
def let(bindings, expr):
def body(env):
@@ -180,972 +192,1296 @@
ret.expr = expr(new_env)
return ret
+
return body
+
def expr_top(expr):
return expr([])
+
class HPI_DefaultInt(MinorFUTiming):
- description = 'HPI_DefaultInt'
+ description = "HPI_DefaultInt"
mask, match = any_non_a64_inst
srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
+
class HPI_DefaultA64Int(MinorFUTiming):
- description = 'HPI_DefaultA64Int'
+ description = "HPI_DefaultA64Int"
mask, match = any_a64_inst
# r, l, (c)
srcRegsRelativeLats = [2, 2, 2, 0]
+
class HPI_DefaultMul(MinorFUTiming):
- description = 'HPI_DefaultMul'
+ description = "HPI_DefaultMul"
mask, match = any_non_a64_inst
# f, f, f, r, l, a?
srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 0]
+
class HPI_DefaultA64Mul(MinorFUTiming):
- description = 'HPI_DefaultA64Mul'
+ description = "HPI_DefaultA64Mul"
mask, match = any_a64_inst
# a (zr for mul), l, r
srcRegsRelativeLats = [0, 0, 0, 0]
# extraCommitLat = 1
+
class HPI_DefaultVfp(MinorFUTiming):
- description = 'HPI_DefaultVfp'
+ description = "HPI_DefaultVfp"
mask, match = any_non_a64_inst
# cpsr, z, z, z, cpacr, fpexc, l_lo, r_lo, l_hi, r_hi (from vadd2h)
- srcRegsRelativeLats = [5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+ srcRegsRelativeLats = [5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
class HPI_DefaultA64Vfp(MinorFUTiming):
- description = 'HPI_DefaultA64Vfp'
+ description = "HPI_DefaultA64Vfp"
mask, match = any_a64_inst
# cpsr, cpacr_el1, fpscr_exc, ...
srcRegsRelativeLats = [5, 5, 5, 2]
+
class HPI_FMADD_A64(MinorFUTiming):
- description = 'HPI_FMADD_A64'
- mask, match = a64_opcode('0001_1111_0x0x_xxxx__0xxx_xxxx_xxxx_xxxx')
+ description = "HPI_FMADD_A64"
+ mask, match = a64_opcode("0001_1111_0x0x_xxxx__0xxx_xxxx_xxxx_xxxx")
# t
# cpsr, cpacr_el1, fpscr_exc, 1, 1, 2, 2, 3, 3, fpscr_exc, d, d, d, d
- srcRegsRelativeLats = [5, 5, 5, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
+ srcRegsRelativeLats = [5, 5, 5, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
+
class HPI_FMSUB_D_A64(MinorFUTiming):
- description = 'HPI_FMSUB_D_A64'
- mask, match = a64_opcode('0001_1111_0x0x_xxxx__1xxx_xxxx_xxxx_xxxx')
+ description = "HPI_FMSUB_D_A64"
+ mask, match = a64_opcode("0001_1111_0x0x_xxxx__1xxx_xxxx_xxxx_xxxx")
# t
# cpsr, cpacr_el1, fpscr_exc, 1, 1, 2, 2, 3, 3, fpscr_exc, d, d, d, d
- srcRegsRelativeLats = [5, 5, 5, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
+ srcRegsRelativeLats = [5, 5, 5, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
+
class HPI_FMOV_A64(MinorFUTiming):
- description = 'HPI_FMOV_A64'
- mask, match = a64_opcode('0001_1110_0x10_0000__0100_00xx_xxxx_xxxx')
+ description = "HPI_FMOV_A64"
+ mask, match = a64_opcode("0001_1110_0x10_0000__0100_00xx_xxxx_xxxx")
# cpsr, cpacr_el1, fpscr_exc, 1, 1, 2, 2, 3, 3, fpscr_exc, d, d, d, d
srcRegsRelativeLats = [5, 5, 5, 0]
+
class HPI_ADD_SUB_vector_scalar_A64(MinorFUTiming):
- description = 'HPI_ADD_SUB_vector_scalar_A64'
- mask, match = a64_opcode('01x1_1110_xx1x_xxxx__1000_01xx_xxxx_xxxx')
+ description = "HPI_ADD_SUB_vector_scalar_A64"
+ mask, match = a64_opcode("01x1_1110_xx1x_xxxx__1000_01xx_xxxx_xxxx")
# cpsr, z, z, z, cpacr, fpexc, l0, r0, l1, r1, l2, r2, l3, r3 (for vadd2h)
srcRegsRelativeLats = [5, 5, 5, 4]
class HPI_ADD_SUB_vector_vector_A64(MinorFUTiming):
- description = 'HPI_ADD_SUB_vector_vector_A64'
- mask, match = a64_opcode('0xx0_1110_xx1x_xxxx__1000_01xx_xxxx_xxxx')
+ description = "HPI_ADD_SUB_vector_vector_A64"
+ mask, match = a64_opcode("0xx0_1110_xx1x_xxxx__1000_01xx_xxxx_xxxx")
# cpsr, z, z, z, cpacr, fpexc, l0, r0, l1, r1, l2, r2, l3, r3 (for vadd2h)
srcRegsRelativeLats = [5, 5, 5, 4]
+
class HPI_FDIV_scalar_32_A64(MinorFUTiming):
- description = 'HPI_FDIV_scalar_32_A64'
- mask, match = a64_opcode('0001_1110_001x_xxxx__0001_10xx_xxxx_xxxx')
+ description = "HPI_FDIV_scalar_32_A64"
+ mask, match = a64_opcode("0001_1110_001x_xxxx__0001_10xx_xxxx_xxxx")
extraCommitLat = 6
- srcRegsRelativeLats = [0, 0, 0, 20, 4]
+ srcRegsRelativeLats = [0, 0, 0, 20, 4]
+
class HPI_FDIV_scalar_64_A64(MinorFUTiming):
- description = 'HPI_FDIV_scalar_64_A64'
- mask, match = a64_opcode('0001_1110_011x_xxxx__0001_10xx_xxxx_xxxx')
+ description = "HPI_FDIV_scalar_64_A64"
+ mask, match = a64_opcode("0001_1110_011x_xxxx__0001_10xx_xxxx_xxxx")
extraCommitLat = 15
- srcRegsRelativeLats = [0, 0, 0, 20, 4]
+ srcRegsRelativeLats = [0, 0, 0, 20, 4]
+
# CINC CINV CSEL CSET CSETM CSINC CSINC CSINV CSINV CSNEG
class HPI_Cxxx_A64(MinorFUTiming):
- description = 'HPI_Cxxx_A64'
- mask, match = a64_opcode('xx01_1010_100x_xxxx_xxxx__0xxx_xxxx_xxxx')
+ description = "HPI_Cxxx_A64"
+ mask, match = a64_opcode("xx01_1010_100x_xxxx_xxxx__0xxx_xxxx_xxxx")
srcRegsRelativeLats = [3, 3, 3, 2, 2]
+
class HPI_DefaultMem(MinorFUTiming):
- description = 'HPI_DefaultMem'
+ description = "HPI_DefaultMem"
mask, match = any_non_a64_inst
srcRegsRelativeLats = [1, 1, 1, 1, 1, 2]
# Assume that LDR/STR take 2 cycles for resolving dependencies
# (1 + 1 of the FU)
extraAssumedLat = 2
+
class HPI_DefaultMem64(MinorFUTiming):
- description = 'HPI_DefaultMem64'
+ description = "HPI_DefaultMem64"
mask, match = any_a64_inst
srcRegsRelativeLats = [2]
# Assume that LDR/STR take 2 cycles for resolving dependencies
# (1 + 1 of the FU)
extraAssumedLat = 3
+
class HPI_DataProcessingMovShiftr(MinorFUTiming):
- description = 'HPI_DataProcessingMovShiftr'
- mask, match = a32_opcode('xxxx_0001_101x_xxxx__xxxx_xxxx_xxx1_xxxx')
+ description = "HPI_DataProcessingMovShiftr"
+ mask, match = a32_opcode("xxxx_0001_101x_xxxx__xxxx_xxxx_xxx1_xxxx")
srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
+
class HPI_DataProcessingMayShift(MinorFUTiming):
- description = 'HPI_DataProcessingMayShift'
- mask, match = a32_opcode('xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+ description = "HPI_DataProcessingMayShift"
+ mask, match = a32_opcode("xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
srcRegsRelativeLats = [3, 3, 2, 2, 1, 1, 0]
+
class HPI_DataProcessingNoShift(MinorFUTiming):
- description = 'HPI_DataProcessingNoShift'
- mask, match = a32_opcode('xxxx_000x_xxxx_xxxx__xxxx_0000_0xx0_xxxx')
+ description = "HPI_DataProcessingNoShift"
+ mask, match = a32_opcode("xxxx_000x_xxxx_xxxx__xxxx_0000_0xx0_xxxx")
srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
+
class HPI_DataProcessingAllowShifti(MinorFUTiming):
- description = 'HPI_DataProcessingAllowShifti'
- mask, match = a32_opcode('xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxx0_xxxx')
+ description = "HPI_DataProcessingAllowShifti"
+ mask, match = a32_opcode("xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxx0_xxxx")
srcRegsRelativeLats = [3, 3, 2, 2, 1, 1, 0]
+
class HPI_DataProcessingSuppressShift(MinorFUTiming):
- description = 'HPI_DataProcessingSuppressShift'
- mask, match = a32_opcode('xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+ description = "HPI_DataProcessingSuppressShift"
+ mask, match = a32_opcode("xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
srcRegsRelativeLats = []
suppress = True
+
class HPI_DataProcessingSuppressBranch(MinorFUTiming):
- description = 'HPI_DataProcessingSuppressBranch'
- mask, match = a32_opcode('xxxx_1010_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+ description = "HPI_DataProcessingSuppressBranch"
+ mask, match = a32_opcode("xxxx_1010_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
srcRegsRelativeLats = []
suppress = True
+
class HPI_BFI_T1(MinorFUTiming):
- description = 'HPI_BFI_T1'
- mask, match = t32_opcode('1111_0x11_0110_xxxx__0xxx_xxxx_xxxx_xxxx')
+ description = "HPI_BFI_T1"
+ mask, match = t32_opcode("1111_0x11_0110_xxxx__0xxx_xxxx_xxxx_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
+
class HPI_BFI_A1(MinorFUTiming):
- description = 'HPI_BFI_A1'
- mask, match = a32_opcode('xxxx_0111_110x_xxxx__xxxx_xxxx_x001_xxxx')
+ description = "HPI_BFI_A1"
+ mask, match = a32_opcode("xxxx_0111_110x_xxxx__xxxx_xxxx_x001_xxxx")
# f, f, f, dest, src
srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
+
class HPI_CLZ_T1(MinorFUTiming):
- description = 'HPI_CLZ_T1'
- mask, match = t32_opcode('1111_1010_1011_xxxx__1111_xxxx_1000_xxxx')
+ description = "HPI_CLZ_T1"
+ mask, match = t32_opcode("1111_1010_1011_xxxx__1111_xxxx_1000_xxxx")
srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
+
class HPI_CLZ_A1(MinorFUTiming):
- description = 'HPI_CLZ_A1'
- mask, match = a32_opcode('xxxx_0001_0110_xxxx__xxxx_xxxx_0001_xxxx')
+ description = "HPI_CLZ_A1"
+ mask, match = a32_opcode("xxxx_0001_0110_xxxx__xxxx_xxxx_0001_xxxx")
srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
+
class HPI_CMN_immediate_A1(MinorFUTiming):
- description = 'HPI_CMN_immediate_A1'
- mask, match = a32_opcode('xxxx_0011_0111_xxxx__xxxx_xxxx_xxxx_xxxx')
+ description = "HPI_CMN_immediate_A1"
+ mask, match = a32_opcode("xxxx_0011_0111_xxxx__xxxx_xxxx_xxxx_xxxx")
srcRegsRelativeLats = [3, 3, 3, 2, 2, 3, 3, 3, 0]
+
class HPI_CMN_register_A1(MinorFUTiming):
- description = 'HPI_CMN_register_A1'
- mask, match = a32_opcode('xxxx_0001_0111_xxxx__xxxx_xxxx_xxx0_xxxx')
+ description = "HPI_CMN_register_A1"
+ mask, match = a32_opcode("xxxx_0001_0111_xxxx__xxxx_xxxx_xxx0_xxxx")
srcRegsRelativeLats = [3, 3, 3, 2, 2, 3, 3, 3, 0]
+
class HPI_CMP_immediate_A1(MinorFUTiming):
- description = 'HPI_CMP_immediate_A1'
- mask, match = a32_opcode('xxxx_0011_0101_xxxx__xxxx_xxxx_xxxx_xxxx')
+ description = "HPI_CMP_immediate_A1"
+ mask, match = a32_opcode("xxxx_0011_0101_xxxx__xxxx_xxxx_xxxx_xxxx")
srcRegsRelativeLats = [3, 3, 3, 2, 2, 3, 3, 3, 0]
+
class HPI_CMP_register_A1(MinorFUTiming):
- description = 'HPI_CMP_register_A1'
- mask, match = a32_opcode('xxxx_0001_0101_xxxx__xxxx_xxxx_xxx0_xxxx')
+ description = "HPI_CMP_register_A1"
+ mask, match = a32_opcode("xxxx_0001_0101_xxxx__xxxx_xxxx_xxx0_xxxx")
srcRegsRelativeLats = [3, 3, 3, 2, 2, 3, 3, 3, 0]
+
class HPI_MLA_T1(MinorFUTiming):
- description = 'HPI_MLA_T1'
- mask, match = t32_opcode('1111_1011_0000_xxxx__xxxx_xxxx_0000_xxxx')
+ description = "HPI_MLA_T1"
+ mask, match = t32_opcode("1111_1011_0000_xxxx__xxxx_xxxx_0000_xxxx")
# z, z, z, a, l?, r?
srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
+
class HPI_MLA_A1(MinorFUTiming):
- description = 'HPI_MLA_A1'
- mask, match = a32_opcode('xxxx_0000_001x_xxxx__xxxx_xxxx_1001_xxxx')
+ description = "HPI_MLA_A1"
+ mask, match = a32_opcode("xxxx_0000_001x_xxxx__xxxx_xxxx_1001_xxxx")
# z, z, z, a, l?, r?
srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
+
class HPI_MADD_A64(MinorFUTiming):
- description = 'HPI_MADD_A64'
- mask, match = a64_opcode('x001_1011_000x_xxxx__0xxx_xxxx_xxxx_xxxx')
+ description = "HPI_MADD_A64"
+ mask, match = a64_opcode("x001_1011_000x_xxxx__0xxx_xxxx_xxxx_xxxx")
# a, l?, r?
srcRegsRelativeLats = [1, 1, 1, 0]
extraCommitLat = 1
+
class HPI_MLS_T1(MinorFUTiming):
- description = 'HPI_MLS_T1'
- mask, match = t32_opcode('1111_1011_0000_xxxx__xxxx_xxxx_0001_xxxx')
+ description = "HPI_MLS_T1"
+ mask, match = t32_opcode("1111_1011_0000_xxxx__xxxx_xxxx_0001_xxxx")
# z, z, z, l?, a, r?
srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
+
class HPI_MLS_A1(MinorFUTiming):
- description = 'HPI_MLS_A1'
- mask, match = a32_opcode('xxxx_0000_0110_xxxx__xxxx_xxxx_1001_xxxx')
+ description = "HPI_MLS_A1"
+ mask, match = a32_opcode("xxxx_0000_0110_xxxx__xxxx_xxxx_1001_xxxx")
# z, z, z, l?, a, r?
srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
+
class HPI_MOVT_A1(MinorFUTiming):
- description = 'HPI_MOVT_A1'
- mask, match = t32_opcode('xxxx_0010_0100_xxxx__xxxx_xxxx_xxxx_xxxx')
+ description = "HPI_MOVT_A1"
+ mask, match = t32_opcode("xxxx_0010_0100_xxxx__xxxx_xxxx_xxxx_xxxx")
+
class HPI_MUL_T1(MinorFUTiming):
- description = 'HPI_MUL_T1'
- mask, match = t16_opcode('0100_0011_01xx_xxxx')
+ description = "HPI_MUL_T1"
+ mask, match = t16_opcode("0100_0011_01xx_xxxx")
+
+
class HPI_MUL_T2(MinorFUTiming):
- description = 'HPI_MUL_T2'
- mask, match = t32_opcode('1111_1011_0000_xxxx_1111_xxxx_0000_xxxx')
+ description = "HPI_MUL_T2"
+ mask, match = t32_opcode("1111_1011_0000_xxxx_1111_xxxx_0000_xxxx")
+
class HPI_PKH_T1(MinorFUTiming):
- description = 'HPI_PKH_T1'
- mask, match = t32_opcode('1110_1010_110x_xxxx__xxxx_xxxx_xxxx_xxxx')
+ description = "HPI_PKH_T1"
+ mask, match = t32_opcode("1110_1010_110x_xxxx__xxxx_xxxx_xxxx_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 1, 0]
+
class HPI_PKH_A1(MinorFUTiming):
- description = 'HPI_PKH_A1'
- mask, match = a32_opcode('xxxx_0110_1000_xxxx__xxxx_xxxx_xx01_xxxx')
+ description = "HPI_PKH_A1"
+ mask, match = a32_opcode("xxxx_0110_1000_xxxx__xxxx_xxxx_xx01_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 1, 0]
+
class HPI_QADD_QSUB_T1(MinorFUTiming):
- description = 'HPI_QADD_QSUB_T1'
- mask, match = t32_opcode('1111_1010_1000_xxxx__1111_xxxx_10x0_xxxx')
+ description = "HPI_QADD_QSUB_T1"
+ mask, match = t32_opcode("1111_1010_1000_xxxx__1111_xxxx_10x0_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
+
class HPI_QADD_QSUB_A1(MinorFUTiming):
- description = 'HPI_QADD_QSUB_A1'
- mask, match = a32_opcode('xxxx_0001_00x0_xxxx__xxxx_xxxx_0101_xxxx')
+ description = "HPI_QADD_QSUB_A1"
+ mask, match = a32_opcode("xxxx_0001_00x0_xxxx__xxxx_xxxx_0101_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
+
# T1 QADD16 QADD8 QSUB16 QSUB8 UQADD16 UQADD8 UQSUB16 UQSUB8
class HPI_QADD_ETC_T1(MinorFUTiming):
- description = 'HPI_QADD_ETC_T1'
- mask, match = t32_opcode('1111_1010_1x0x_xxxx__1111_xxxx_0x01_xxxx')
+ description = "HPI_QADD_ETC_T1"
+ mask, match = t32_opcode("1111_1010_1x0x_xxxx__1111_xxxx_0x01_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
+
# A1 QADD16 QADD8 QSAX QSUB16 QSUB8 UQADD16 UQADD8 UQASX UQSAX UQSUB16 UQSUB8
class HPI_QADD_ETC_A1(MinorFUTiming):
- description = 'HPI_QADD_ETC_A1'
- mask, match = a32_opcode('xxxx_0110_0x10_xxxx__xxxx_xxxx_xxx1_xxxx')
+ description = "HPI_QADD_ETC_A1"
+ mask, match = a32_opcode("xxxx_0110_0x10_xxxx__xxxx_xxxx_xxx1_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
+
class HPI_QASX_QSAX_UQASX_UQSAX_T1(MinorFUTiming):
- description = 'HPI_QASX_QSAX_UQASX_UQSAX_T1'
- mask, match = t32_opcode('1111_1010_1x10_xxxx__1111_xxxx_0x01_xxxx')
+ description = "HPI_QASX_QSAX_UQASX_UQSAX_T1"
+ mask, match = t32_opcode("1111_1010_1x10_xxxx__1111_xxxx_0x01_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
+
class HPI_QDADD_QDSUB_T1(MinorFUTiming):
- description = 'HPI_QDADD_QDSUB_T1'
- mask, match = t32_opcode('1111_1010_1000_xxxx__1111_xxxx_10x1_xxxx')
+ description = "HPI_QDADD_QDSUB_T1"
+ mask, match = t32_opcode("1111_1010_1000_xxxx__1111_xxxx_10x1_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 1, 0]
+
class HPI_QDADD_QDSUB_A1(MinorFUTiming):
- description = 'HPI_QDADD_QSUB_A1'
- mask, match = a32_opcode('xxxx_0001_01x0_xxxx__xxxx_xxxx_0101_xxxx')
+ description = "HPI_QDADD_QSUB_A1"
+ mask, match = a32_opcode("xxxx_0001_01x0_xxxx__xxxx_xxxx_0101_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 1, 0]
+
class HPI_RBIT_A1(MinorFUTiming):
- description = 'HPI_RBIT_A1'
- mask, match = a32_opcode('xxxx_0110_1111_xxxx__xxxx_xxxx_0011_xxxx')
+ description = "HPI_RBIT_A1"
+ mask, match = a32_opcode("xxxx_0110_1111_xxxx__xxxx_xxxx_0011_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 0]
+
class HPI_REV_REV16_A1(MinorFUTiming):
- description = 'HPI_REV_REV16_A1'
- mask, match = a32_opcode('xxxx_0110_1011_xxxx__xxxx_xxxx_x011_xxxx')
+ description = "HPI_REV_REV16_A1"
+ mask, match = a32_opcode("xxxx_0110_1011_xxxx__xxxx_xxxx_x011_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 0]
+
class HPI_REVSH_A1(MinorFUTiming):
- description = 'HPI_REVSH_A1'
- mask, match = a32_opcode('xxxx_0110_1111_xxxx__xxxx_xxxx_1011_xxxx')
+ description = "HPI_REVSH_A1"
+ mask, match = a32_opcode("xxxx_0110_1111_xxxx__xxxx_xxxx_1011_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 0]
+
class HPI_ADD_ETC_A1(MinorFUTiming):
- description = 'HPI_ADD_ETC_A1'
- mask, match = a32_opcode('xxxx_0110_0xx1_xxxx__xxxx_xxxx_x001_xxxx')
+ description = "HPI_ADD_ETC_A1"
+ mask, match = a32_opcode("xxxx_0110_0xx1_xxxx__xxxx_xxxx_x001_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 2, 0]
+
class HPI_ADD_ETC_T1(MinorFUTiming):
- description = 'HPI_ADD_ETC_A1'
- mask, match = t32_opcode('1111_1010_100x_xxxx__1111_xxxx_0xx0_xxxx')
+ description = "HPI_ADD_ETC_A1"
+ mask, match = t32_opcode("1111_1010_100x_xxxx__1111_xxxx_0xx0_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 2, 0]
+
class HPI_SASX_SHASX_UASX_UHASX_A1(MinorFUTiming):
- description = 'HPI_SASX_SHASX_UASX_UHASX_A1'
- mask, match = a32_opcode('xxxx_0110_0xx1_xxxx__xxxx_xxxx_0011_xxxx')
+ description = "HPI_SASX_SHASX_UASX_UHASX_A1"
+ mask, match = a32_opcode("xxxx_0110_0xx1_xxxx__xxxx_xxxx_0011_xxxx")
srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
+
class HPI_SBFX_UBFX_A1(MinorFUTiming):
- description = 'HPI_SBFX_UBFX_A1'
- mask, match = a32_opcode('xxxx_0111_1x1x_xxxx__xxxx_xxxx_x101_xxxx')
+ description = "HPI_SBFX_UBFX_A1"
+ mask, match = a32_opcode("xxxx_0111_1x1x_xxxx__xxxx_xxxx_x101_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 0]
+
### SDIV
-sdiv_lat_expr = expr_top(let([
- ('left', un('SignExtend32To64', int_reg(src(4)))),
- ('right', un('SignExtend32To64', int_reg(src(3)))),
- ('either_signed', bin('Or',
- bin('SLessThan', ref('left'), literal(0)),
- bin('SLessThan', ref('right'), literal(0)))),
- ('left_size', un('SizeInBits', un('Abs', ref('left')))),
- ('signed_adjust', if_expr(ref('either_signed'), literal(1), literal(0))),
- ('right_size', un('SizeInBits',
- bin('UDiv', un('Abs', ref('right')),
- if_expr(ref('either_signed'), literal(4), literal(2))))),
- ('left_minus_right', if_expr(
- bin('SLessThan', ref('left_size'), ref('right_size')),
- literal(0),
- bin('Sub', ref('left_size'), ref('right_size'))))
- ],
- bin('Add',
- ref('signed_adjust'),
- if_expr(bin('Equal', ref('right'), literal(0)),
- literal(0),
- bin('UDiv', ref('left_minus_right'), literal(4))))
- ))
+sdiv_lat_expr = expr_top(
+ let(
+ [
+ ("left", un("SignExtend32To64", src_reg(4))),
+ ("right", un("SignExtend32To64", src_reg(3))),
+ (
+ "either_signed",
+ bin(
+ "Or",
+ bin("SLessThan", ref("left"), literal(0)),
+ bin("SLessThan", ref("right"), literal(0)),
+ ),
+ ),
+ ("left_size", un("SizeInBits", un("Abs", ref("left")))),
+ (
+ "signed_adjust",
+ if_expr(ref("either_signed"), literal(1), literal(0)),
+ ),
+ (
+ "right_size",
+ un(
+ "SizeInBits",
+ bin(
+ "UDiv",
+ un("Abs", ref("right")),
+ if_expr(ref("either_signed"), literal(4), literal(2)),
+ ),
+ ),
+ ),
+ (
+ "left_minus_right",
+ if_expr(
+ bin("SLessThan", ref("left_size"), ref("right_size")),
+ literal(0),
+ bin("Sub", ref("left_size"), ref("right_size")),
+ ),
+ ),
+ ],
+ bin(
+ "Add",
+ ref("signed_adjust"),
+ if_expr(
+ bin("Equal", ref("right"), literal(0)),
+ literal(0),
+ bin("UDiv", ref("left_minus_right"), literal(4)),
+ ),
+ ),
+ )
+)
-sdiv_lat_expr64 = expr_top(let([
- ('left', un('SignExtend32To64', int_reg(src(0)))),
- ('right', un('SignExtend32To64', int_reg(src(1)))),
- ('either_signed', bin('Or',
- bin('SLessThan', ref('left'), literal(0)),
- bin('SLessThan', ref('right'), literal(0)))),
- ('left_size', un('SizeInBits', un('Abs', ref('left')))),
- ('signed_adjust', if_expr(ref('either_signed'), literal(1), literal(0))),
- ('right_size', un('SizeInBits',
- bin('UDiv', un('Abs', ref('right')),
- if_expr(ref('either_signed'), literal(4), literal(2))))),
- ('left_minus_right', if_expr(
- bin('SLessThan', ref('left_size'), ref('right_size')),
- literal(0),
- bin('Sub', ref('left_size'), ref('right_size'))))
- ],
- bin('Add',
- ref('signed_adjust'),
- if_expr(bin('Equal', ref('right'), literal(0)),
- literal(0),
- bin('UDiv', ref('left_minus_right'), literal(4))))
- ))
+sdiv_lat_expr64 = expr_top(
+ let(
+ [
+ ("left", un("SignExtend32To64", src_reg(0))),
+ ("right", un("SignExtend32To64", src_reg(1))),
+ (
+ "either_signed",
+ bin(
+ "Or",
+ bin("SLessThan", ref("left"), literal(0)),
+ bin("SLessThan", ref("right"), literal(0)),
+ ),
+ ),
+ ("left_size", un("SizeInBits", un("Abs", ref("left")))),
+ (
+ "signed_adjust",
+ if_expr(ref("either_signed"), literal(1), literal(0)),
+ ),
+ (
+ "right_size",
+ un(
+ "SizeInBits",
+ bin(
+ "UDiv",
+ un("Abs", ref("right")),
+ if_expr(ref("either_signed"), literal(4), literal(2)),
+ ),
+ ),
+ ),
+ (
+ "left_minus_right",
+ if_expr(
+ bin("SLessThan", ref("left_size"), ref("right_size")),
+ literal(0),
+ bin("Sub", ref("left_size"), ref("right_size")),
+ ),
+ ),
+ ],
+ bin(
+ "Add",
+ ref("signed_adjust"),
+ if_expr(
+ bin("Equal", ref("right"), literal(0)),
+ literal(0),
+ bin("UDiv", ref("left_minus_right"), literal(4)),
+ ),
+ ),
+ )
+)
+
class HPI_SDIV_A1(MinorFUTiming):
- description = 'HPI_SDIV_A1'
- mask, match = a32_opcode('xxxx_0111_0001_xxxx__xxxx_xxxx_0001_xxxx')
+ description = "HPI_SDIV_A1"
+ mask, match = a32_opcode("xxxx_0111_0001_xxxx__xxxx_xxxx_0001_xxxx")
extraCommitLat = 0
srcRegsRelativeLats = []
extraCommitLatExpr = sdiv_lat_expr
+
class HPI_SDIV_A64(MinorFUTiming):
- description = 'HPI_SDIV_A64'
- mask, match = a64_opcode('x001_1010_110x_xxxx__0000_11xx_xxxx_xxxx')
+ description = "HPI_SDIV_A64"
+ mask, match = a64_opcode("x001_1010_110x_xxxx__0000_11xx_xxxx_xxxx")
extraCommitLat = 0
srcRegsRelativeLats = []
extraCommitLatExpr = sdiv_lat_expr64
+
### SEL
+
class HPI_SEL_A1(MinorFUTiming):
- description = 'HPI_SEL_A1'
- mask, match = a32_opcode('xxxx_0110_1000_xxxx__xxxx_xxxx_1011_xxxx')
+ description = "HPI_SEL_A1"
+ mask, match = a32_opcode("xxxx_0110_1000_xxxx__xxxx_xxxx_1011_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 2, 2, 0]
+
class HPI_SEL_A1_Suppress(MinorFUTiming):
- description = 'HPI_SEL_A1_Suppress'
- mask, match = a32_opcode('xxxx_0110_1000_xxxx__xxxx_xxxx_1011_xxxx')
+ description = "HPI_SEL_A1_Suppress"
+ mask, match = a32_opcode("xxxx_0110_1000_xxxx__xxxx_xxxx_1011_xxxx")
srcRegsRelativeLats = []
suppress = True
+
class HPI_SHSAX_SSAX_UHSAX_USAX_A1(MinorFUTiming):
- description = 'HPI_SHSAX_SSAX_UHSAX_USAX_A1'
- mask, match = a32_opcode('xxxx_0110_0xx1_xxxx__xxxx_xxxx_0101_xxxx')
+ description = "HPI_SHSAX_SSAX_UHSAX_USAX_A1"
+ mask, match = a32_opcode("xxxx_0110_0xx1_xxxx__xxxx_xxxx_0101_xxxx")
# As Default
srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
+
class HPI_USUB_ETC_A1(MinorFUTiming):
- description = 'HPI_USUB_ETC_A1'
- mask, match = a32_opcode('xxxx_0110_0xx1_xxxx__xxxx_xxxx_x111_xxxx')
+ description = "HPI_USUB_ETC_A1"
+ mask, match = a32_opcode("xxxx_0110_0xx1_xxxx__xxxx_xxxx_x111_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 2, 0]
+
class HPI_SMLABB_T1(MinorFUTiming):
- description = 'HPI_SMLABB_T1'
- mask, match = t32_opcode('1111_1011_0001_xxxx__xxxx_xxxx_00xx_xxxx')
+ description = "HPI_SMLABB_T1"
+ mask, match = t32_opcode("1111_1011_0001_xxxx__xxxx_xxxx_00xx_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
+
class HPI_SMLABB_A1(MinorFUTiming):
- description = 'HPI_SMLABB_A1'
- mask, match = a32_opcode('xxxx_0001_0000_xxxx__xxxx_xxxx_1xx0_xxxx')
+ description = "HPI_SMLABB_A1"
+ mask, match = a32_opcode("xxxx_0001_0000_xxxx__xxxx_xxxx_1xx0_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
+
class HPI_SMLAD_T1(MinorFUTiming):
- description = 'HPI_SMLAD_T1'
- mask, match = t32_opcode('1111_1011_0010_xxxx__xxxx_xxxx_000x_xxxx')
+ description = "HPI_SMLAD_T1"
+ mask, match = t32_opcode("1111_1011_0010_xxxx__xxxx_xxxx_000x_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
+
class HPI_SMLAD_A1(MinorFUTiming):
- description = 'HPI_SMLAD_A1'
- mask, match = a32_opcode('xxxx_0111_0000_xxxx__xxxx_xxxx_00x1_xxxx')
+ description = "HPI_SMLAD_A1"
+ mask, match = a32_opcode("xxxx_0111_0000_xxxx__xxxx_xxxx_00x1_xxxx")
# z, z, z, l, r, a
srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
+
class HPI_SMLAL_T1(MinorFUTiming):
- description = 'HPI_SMLAL_T1'
- mask, match = t32_opcode('1111_1011_1100_xxxx__xxxx_xxxx_0000_xxxx')
+ description = "HPI_SMLAL_T1"
+ mask, match = t32_opcode("1111_1011_1100_xxxx__xxxx_xxxx_0000_xxxx")
+
+
class HPI_SMLAL_A1(MinorFUTiming):
- description = 'HPI_SMLAL_A1'
- mask, match = a32_opcode('xxxx_0000_111x_xxxx__xxxx_xxxx_1001_xxxx')
+ description = "HPI_SMLAL_A1"
+ mask, match = a32_opcode("xxxx_0000_111x_xxxx__xxxx_xxxx_1001_xxxx")
+
class HPI_SMLALBB_T1(MinorFUTiming):
- description = 'HPI_SMLALBB_T1'
- mask, match = t32_opcode('1111_1011_1100_xxxx__xxxx_xxxx_10xx_xxxx')
+ description = "HPI_SMLALBB_T1"
+ mask, match = t32_opcode("1111_1011_1100_xxxx__xxxx_xxxx_10xx_xxxx")
+
+
class HPI_SMLALBB_A1(MinorFUTiming):
- description = 'HPI_SMLALBB_A1'
- mask, match = a32_opcode('xxxx_0001_0100_xxxx__xxxx_xxxx_1xx0_xxxx')
+ description = "HPI_SMLALBB_A1"
+ mask, match = a32_opcode("xxxx_0001_0100_xxxx__xxxx_xxxx_1xx0_xxxx")
+
class HPI_SMLALD_T1(MinorFUTiming):
- description = 'HPI_SMLALD_T1'
- mask, match = t32_opcode('1111_1011_1100_xxxx__xxxx_xxxx_110x_xxxx')
+ description = "HPI_SMLALD_T1"
+ mask, match = t32_opcode("1111_1011_1100_xxxx__xxxx_xxxx_110x_xxxx")
+
+
class HPI_SMLALD_A1(MinorFUTiming):
- description = 'HPI_SMLALD_A1'
- mask, match = a32_opcode('xxxx_0111_0100_xxxx__xxxx_xxxx_00x1_xxxx')
+ description = "HPI_SMLALD_A1"
+ mask, match = a32_opcode("xxxx_0111_0100_xxxx__xxxx_xxxx_00x1_xxxx")
+
class HPI_SMLAWB_T1(MinorFUTiming):
- description = 'HPI_SMLAWB_T1'
- mask, match = t32_opcode('1111_1011_0011_xxxx__xxxx_xxxx_000x_xxxx')
+ description = "HPI_SMLAWB_T1"
+ mask, match = t32_opcode("1111_1011_0011_xxxx__xxxx_xxxx_000x_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
+
class HPI_SMLAWB_A1(MinorFUTiming):
- description = 'HPI_SMLAWB_A1'
- mask, match = a32_opcode('xxxx_0001_0010_xxxx__xxxx_xxxx_1x00_xxxx')
+ description = "HPI_SMLAWB_A1"
+ mask, match = a32_opcode("xxxx_0001_0010_xxxx__xxxx_xxxx_1x00_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
+
class HPI_SMLSD_A1(MinorFUTiming):
- description = 'HPI_SMLSD_A1'
- mask, match = a32_opcode('xxxx_0111_0000_xxxx__xxxx_xxxx_01x1_xxxx')
+ description = "HPI_SMLSD_A1"
+ mask, match = a32_opcode("xxxx_0111_0000_xxxx__xxxx_xxxx_01x1_xxxx")
+
class HPI_SMLSLD_T1(MinorFUTiming):
- description = 'HPI_SMLSLD_T1'
- mask, match = t32_opcode('1111_1011_1101_xxxx__xxxx_xxxx_110x_xxxx')
+ description = "HPI_SMLSLD_T1"
+ mask, match = t32_opcode("1111_1011_1101_xxxx__xxxx_xxxx_110x_xxxx")
+
+
class HPI_SMLSLD_A1(MinorFUTiming):
- description = 'HPI_SMLSLD_A1'
- mask, match = a32_opcode('xxxx_0111_0100_xxxx__xxxx_xxxx_01x1_xxxx')
+ description = "HPI_SMLSLD_A1"
+ mask, match = a32_opcode("xxxx_0111_0100_xxxx__xxxx_xxxx_01x1_xxxx")
+
class HPI_SMMLA_T1(MinorFUTiming):
- description = 'HPI_SMMLA_T1'
- mask, match = t32_opcode('1111_1011_0101_xxxx__xxxx_xxxx_000x_xxxx')
+ description = "HPI_SMMLA_T1"
+ mask, match = t32_opcode("1111_1011_0101_xxxx__xxxx_xxxx_000x_xxxx")
# ^^^^ != 1111
srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
+
class HPI_SMMLA_A1(MinorFUTiming):
- description = 'HPI_SMMLA_A1'
+ description = "HPI_SMMLA_A1"
# Note that this must be after the encoding for SMMUL
- mask, match = a32_opcode('xxxx_0111_0101_xxxx__xxxx_xxxx_00x1_xxxx')
+ mask, match = a32_opcode("xxxx_0111_0101_xxxx__xxxx_xxxx_00x1_xxxx")
# ^^^^ != 1111
srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
+
class HPI_SMMLS_T1(MinorFUTiming):
- description = 'HPI_SMMLS_T1'
- mask, match = t32_opcode('1111_1011_0110_xxxx__xxxx_xxxx_000x_xxxx')
+ description = "HPI_SMMLS_T1"
+ mask, match = t32_opcode("1111_1011_0110_xxxx__xxxx_xxxx_000x_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
+
class HPI_SMMLS_A1(MinorFUTiming):
- description = 'HPI_SMMLS_A1'
- mask, match = a32_opcode('xxxx_0111_0101_xxxx__xxxx_xxxx_11x1_xxxx')
+ description = "HPI_SMMLS_A1"
+ mask, match = a32_opcode("xxxx_0111_0101_xxxx__xxxx_xxxx_11x1_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
+
class HPI_SMMUL_T1(MinorFUTiming):
- description = 'HPI_SMMUL_T1'
- mask, match = t32_opcode('1111_1011_0101_xxxx__1111_xxxx_000x_xxxx')
+ description = "HPI_SMMUL_T1"
+ mask, match = t32_opcode("1111_1011_0101_xxxx__1111_xxxx_000x_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 0, 0]
+
class HPI_SMMUL_A1(MinorFUTiming):
- description = 'HPI_SMMUL_A1'
- mask, match = a32_opcode('xxxx_0111_0101_xxxx__1111_xxxx_00x1_xxxx')
+ description = "HPI_SMMUL_A1"
+ mask, match = a32_opcode("xxxx_0111_0101_xxxx__1111_xxxx_00x1_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 0, 0]
+
class HPI_SMUAD_T1(MinorFUTiming):
- description = 'HPI_SMUAD_T1'
- mask, match = t32_opcode('1111_1011_0010_xxxx__1111_xxxx_000x_xxxx')
+ description = "HPI_SMUAD_T1"
+ mask, match = t32_opcode("1111_1011_0010_xxxx__1111_xxxx_000x_xxxx")
+
+
class HPI_SMUAD_A1(MinorFUTiming):
- description = 'HPI_SMUAD_A1'
- mask, match = a32_opcode('xxxx_0111_0000_xxxx__1111_xxxx_00x1_xxxx')
+ description = "HPI_SMUAD_A1"
+ mask, match = a32_opcode("xxxx_0111_0000_xxxx__1111_xxxx_00x1_xxxx")
+
class HPI_SMULBB_T1(MinorFUTiming):
- description = 'HPI_SMULBB_T1'
- mask, match = t32_opcode('1111_1011_0001_xxxx__1111_xxxx_00xx_xxxx')
+ description = "HPI_SMULBB_T1"
+ mask, match = t32_opcode("1111_1011_0001_xxxx__1111_xxxx_00xx_xxxx")
+
+
class HPI_SMULBB_A1(MinorFUTiming):
- description = 'HPI_SMULBB_A1'
- mask, match = a32_opcode('xxxx_0001_0110_xxxx__xxxx_xxxx_1xx0_xxxx')
+ description = "HPI_SMULBB_A1"
+ mask, match = a32_opcode("xxxx_0001_0110_xxxx__xxxx_xxxx_1xx0_xxxx")
+
class HPI_SMULL_T1(MinorFUTiming):
- description = 'HPI_SMULL_T1'
- mask, match = t32_opcode('1111_1011_1000_xxxx__xxxx_xxxx_0000_xxxx')
+ description = "HPI_SMULL_T1"
+ mask, match = t32_opcode("1111_1011_1000_xxxx__xxxx_xxxx_0000_xxxx")
+
+
class HPI_SMULL_A1(MinorFUTiming):
- description = 'HPI_SMULL_A1'
- mask, match = a32_opcode('xxxx_0000_110x_xxxx__xxxx_xxxx_1001_xxxx')
+ description = "HPI_SMULL_A1"
+ mask, match = a32_opcode("xxxx_0000_110x_xxxx__xxxx_xxxx_1001_xxxx")
+
class HPI_SMULWB_T1(MinorFUTiming):
- description = 'HPI_SMULWB_T1'
- mask, match = t32_opcode('1111_1011_0011_xxxx__1111_xxxx_000x_xxxx')
+ description = "HPI_SMULWB_T1"
+ mask, match = t32_opcode("1111_1011_0011_xxxx__1111_xxxx_000x_xxxx")
+
+
class HPI_SMULWB_A1(MinorFUTiming):
- description = 'HPI_SMULWB_A1'
- mask, match = a32_opcode('xxxx_0001_0010_xxxx__xxxx_xxxx_1x10_xxxx')
+ description = "HPI_SMULWB_A1"
+ mask, match = a32_opcode("xxxx_0001_0010_xxxx__xxxx_xxxx_1x10_xxxx")
+
class HPI_SMUSD_T1(MinorFUTiming):
- description = 'HPI_SMUSD_T1'
- mask, match = t32_opcode('1111_1011_0100_xxxx__1111_xxxx_000x_xxxx')
+ description = "HPI_SMUSD_T1"
+ mask, match = t32_opcode("1111_1011_0100_xxxx__1111_xxxx_000x_xxxx")
+
+
class HPI_SMUSD_A1(MinorFUTiming):
- description = 'HPI_SMUSD_A1'
- mask, match = a32_opcode('xxxx_0111_0000_xxxx__1111_xxxx_01x1_xxxx')
+ description = "HPI_SMUSD_A1"
+ mask, match = a32_opcode("xxxx_0111_0000_xxxx__1111_xxxx_01x1_xxxx")
+
class HPI_SSAT_USAT_no_shift_A1(MinorFUTiming):
- description = 'HPI_SSAT_USAT_no_shift_A1'
+ description = "HPI_SSAT_USAT_no_shift_A1"
# Order *before* shift
- mask, match = a32_opcode('xxxx_0110_1x1x_xxxx__xxxx_0000_0001_xxxx')
+ mask, match = a32_opcode("xxxx_0110_1x1x_xxxx__xxxx_0000_0001_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 0]
+
class HPI_SSAT_USAT_shift_A1(MinorFUTiming):
- description = 'HPI_SSAT_USAT_shift_A1'
+ description = "HPI_SSAT_USAT_shift_A1"
# Order after shift
- mask, match = a32_opcode('xxxx_0110_1x1x_xxxx__xxxx_xxxx_xx01_xxxx')
+ mask, match = a32_opcode("xxxx_0110_1x1x_xxxx__xxxx_xxxx_xx01_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 0]
+
class HPI_SSAT16_USAT16_A1(MinorFUTiming):
- description = 'HPI_SSAT16_USAT16_A1'
- mask, match = a32_opcode('xxxx_0110_1x10_xxxx__xxxx_xxxx_0011_xxxx')
+ description = "HPI_SSAT16_USAT16_A1"
+ mask, match = a32_opcode("xxxx_0110_1x10_xxxx__xxxx_xxxx_0011_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 0]
+
class HPI_SXTAB_T1(MinorFUTiming):
- description = 'HPI_SXTAB_T1'
- mask, match = t32_opcode('1111_1010_0100_xxxx__1111_xxxx_1xxx_xxxx')
+ description = "HPI_SXTAB_T1"
+ mask, match = t32_opcode("1111_1010_0100_xxxx__1111_xxxx_1xxx_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
+
class HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1(MinorFUTiming):
- description = 'HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1'
+ description = "HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1"
# Place AFTER HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1
# e6[9d][^f]0070 are undefined
- mask, match = a32_opcode('xxxx_0110_1xxx_xxxx__xxxx_xxxx_0111_xxxx')
+ mask, match = a32_opcode("xxxx_0110_1xxx_xxxx__xxxx_xxxx_0111_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
+
class HPI_SXTAB16_T1(MinorFUTiming):
- description = 'HPI_SXTAB16_T1'
- mask, match = t32_opcode('1111_1010_0010_xxxx__1111_xxxx_1xxx_xxxx')
+ description = "HPI_SXTAB16_T1"
+ mask, match = t32_opcode("1111_1010_0010_xxxx__1111_xxxx_1xxx_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
+
class HPI_SXTAH_T1(MinorFUTiming):
- description = 'HPI_SXTAH_T1'
- mask, match = t32_opcode('1111_1010_0000_xxxx__1111_xxxx_1xxx_xxxx')
+ description = "HPI_SXTAH_T1"
+ mask, match = t32_opcode("1111_1010_0000_xxxx__1111_xxxx_1xxx_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
+
class HPI_SXTB_T1(MinorFUTiming):
- description = 'HPI_SXTB_T1'
- mask, match = t16_opcode('1011_0010_01xx_xxxx')
+ description = "HPI_SXTB_T1"
+ mask, match = t16_opcode("1011_0010_01xx_xxxx")
+
+
class HPI_SXTB_T2(MinorFUTiming):
- description = 'HPI_SXTB_T2'
- mask, match = t32_opcode('1111_1010_0100_1111__1111_xxxx_1xxx_xxxx')
+ description = "HPI_SXTB_T2"
+ mask, match = t32_opcode("1111_1010_0100_1111__1111_xxxx_1xxx_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
+
class HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1(MinorFUTiming):
- description = 'HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1'
+ description = "HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1"
# e6[9d]f0070 are undefined
- mask, match = a32_opcode('xxxx_0110_1xxx_1111__xxxx_xxxx_0111_xxxx')
+ mask, match = a32_opcode("xxxx_0110_1xxx_1111__xxxx_xxxx_0111_xxxx")
srcRegsRelativeLats = [0, 0, 0, 2, 0]
+
class HPI_SXTB16_T1(MinorFUTiming):
- description = 'HPI_SXTB16_T1'
- mask, match = t32_opcode('1111_1010_0010_1111__1111_xxxx_1xxx_xxxx')
+ description = "HPI_SXTB16_T1"
+ mask, match = t32_opcode("1111_1010_0010_1111__1111_xxxx_1xxx_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
+
class HPI_SXTH_T1(MinorFUTiming):
- description = 'HPI_SXTH_T1'
- mask, match = t16_opcode('1011_0010_00xx_xxxx')
+ description = "HPI_SXTH_T1"
+ mask, match = t16_opcode("1011_0010_00xx_xxxx")
+
+
class HPI_SXTH_T2(MinorFUTiming):
- description = 'HPI_SXTH_T2'
- mask, match = t32_opcode('1111_1010_0000_1111__1111_xxxx_1xxx_xxxx')
+ description = "HPI_SXTH_T2"
+ mask, match = t32_opcode("1111_1010_0000_1111__1111_xxxx_1xxx_xxxx")
srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
-class HPI_UDIV_T1(MinorFUTiming):
- description = 'HPI_UDIV_T1'
- mask, match = t32_opcode('1111_1011_1011_xxxx__xxxx_xxxx_1111_xxxx')
-udiv_lat_expr = expr_top(let([
- ('left', int_reg(src(4))),
- ('right', int_reg(src(3))),
- ('left_size', un('SizeInBits', ref('left'))),
- ('right_size', un('SizeInBits',
- bin('UDiv', ref('right'), literal(2)))),
- ('left_minus_right', if_expr(
- bin('SLessThan', ref('left_size'), ref('right_size')),
- literal(0),
- bin('Sub', ref('left_size'), ref('right_size'))))
- ],
- if_expr(bin('Equal', ref('right'), literal(0)),
- literal(0),
- bin('UDiv', ref('left_minus_right'), literal(4)))
- ))
+class HPI_UDIV_T1(MinorFUTiming):
+ description = "HPI_UDIV_T1"
+ mask, match = t32_opcode("1111_1011_1011_xxxx__xxxx_xxxx_1111_xxxx")
+
+
+udiv_lat_expr = expr_top(
+ let(
+ [
+ ("left", src_reg(4)),
+ ("right", src_reg(3)),
+ ("left_size", un("SizeInBits", ref("left"))),
+ (
+ "right_size",
+ un("SizeInBits", bin("UDiv", ref("right"), literal(2))),
+ ),
+ (
+ "left_minus_right",
+ if_expr(
+ bin("SLessThan", ref("left_size"), ref("right_size")),
+ literal(0),
+ bin("Sub", ref("left_size"), ref("right_size")),
+ ),
+ ),
+ ],
+ if_expr(
+ bin("Equal", ref("right"), literal(0)),
+ literal(0),
+ bin("UDiv", ref("left_minus_right"), literal(4)),
+ ),
+ )
+)
+
class HPI_UDIV_A1(MinorFUTiming):
- description = 'HPI_UDIV_A1'
- mask, match = a32_opcode('xxxx_0111_0011_xxxx__xxxx_xxxx_0001_xxxx')
+ description = "HPI_UDIV_A1"
+ mask, match = a32_opcode("xxxx_0111_0011_xxxx__xxxx_xxxx_0001_xxxx")
extraCommitLat = 0
srcRegsRelativeLats = []
extraCommitLatExpr = udiv_lat_expr
+
class HPI_UMAAL_T1(MinorFUTiming):
- description = 'HPI_UMAAL_T1'
- mask, match = t32_opcode('1111_1011_1110_xxxx__xxxx_xxxx_0110_xxxx')
+ description = "HPI_UMAAL_T1"
+ mask, match = t32_opcode("1111_1011_1110_xxxx__xxxx_xxxx_0110_xxxx")
# z, z, z, dlo, dhi, l, r
extraCommitLat = 1
srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 0, 0]
+
class HPI_UMAAL_A1(MinorFUTiming):
- description = 'HPI_UMAAL_A1'
- mask, match = a32_opcode('xxxx_0000_0100_xxxx__xxxx_xxxx_1001_xxxx')
+ description = "HPI_UMAAL_A1"
+ mask, match = a32_opcode("xxxx_0000_0100_xxxx__xxxx_xxxx_1001_xxxx")
# z, z, z, dlo, dhi, l, r
extraCommitLat = 1
srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 0, 0]
+
class HPI_UMLAL_T1(MinorFUTiming):
- description = 'HPI_UMLAL_T1'
- mask, match = t32_opcode('1111_1011_1110_xxxx__xxxx_xxxx_0000_xxxx')
+ description = "HPI_UMLAL_T1"
+ mask, match = t32_opcode("1111_1011_1110_xxxx__xxxx_xxxx_0000_xxxx")
+
class HPI_UMLAL_A1(MinorFUTiming):
- description = 'HPI_UMLAL_A1'
- mask, match = t32_opcode('xxxx_0000_101x_xxxx__xxxx_xxxx_1001_xxxx')
+ description = "HPI_UMLAL_A1"
+ mask, match = t32_opcode("xxxx_0000_101x_xxxx__xxxx_xxxx_1001_xxxx")
+
class HPI_UMULL_T1(MinorFUTiming):
- description = 'HPI_UMULL_T1'
- mask, match = t32_opcode('1111_1011_1010_xxxx__xxxx_xxxx_0000_xxxx')
+ description = "HPI_UMULL_T1"
+ mask, match = t32_opcode("1111_1011_1010_xxxx__xxxx_xxxx_0000_xxxx")
+
class HPI_UMULL_A1(MinorFUTiming):
- description = 'HPI_UMULL_A1'
- mask, match = a32_opcode('xxxx_0000_100x_xxxx__xxxx_xxxx_1001_xxxx')
+ description = "HPI_UMULL_A1"
+ mask, match = a32_opcode("xxxx_0000_100x_xxxx__xxxx_xxxx_1001_xxxx")
+
class HPI_USAD8_USADA8_A1(MinorFUTiming):
- description = 'HPI_USAD8_USADA8_A1'
- mask, match = a32_opcode('xxxx_0111_1000_xxxx__xxxx_xxxx_0001_xxxx')
+ description = "HPI_USAD8_USADA8_A1"
+ mask, match = a32_opcode("xxxx_0111_1000_xxxx__xxxx_xxxx_0001_xxxx")
srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
+
class HPI_USAD8_USADA8_A1_Suppress(MinorFUTiming):
- description = 'HPI_USAD8_USADA8_A1_Suppress'
- mask, match = a32_opcode('xxxx_0111_1000_xxxx__xxxx_xxxx_0001_xxxx')
+ description = "HPI_USAD8_USADA8_A1_Suppress"
+ mask, match = a32_opcode("xxxx_0111_1000_xxxx__xxxx_xxxx_0001_xxxx")
srcRegsRelativeLats = []
suppress = True
+
class HPI_VMOV_immediate_A1(MinorFUTiming):
- description = 'HPI_VMOV_register_A1'
- mask, match = a32_opcode('1111_0010_0x10_xxxx_xxxx_0001_xxx1_xxxx')
+ description = "HPI_VMOV_register_A1"
+ mask, match = a32_opcode("1111_0010_0x10_xxxx_xxxx_0001_xxx1_xxxx")
# cpsr, z, z, z, hcptr, nsacr, cpacr, fpexc, scr
srcRegsRelativeLats = [5, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+
class HPI_VMRS_A1(MinorFUTiming):
- description = 'HPI_VMRS_A1'
- mask, match = a32_opcode('xxxx_1110_1111_0001_xxxx_1010_xxx1_xxxx')
+ description = "HPI_VMRS_A1"
+ mask, match = a32_opcode("xxxx_1110_1111_0001_xxxx_1010_xxx1_xxxx")
# cpsr,z,z,z,hcptr,nsacr,cpacr,scr,r42
srcRegsRelativeLats = [5, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+
class HPI_VMOV_register_A2(MinorFUTiming):
- description = 'HPI_VMOV_register_A2'
- mask, match = a32_opcode('xxxx_1110_1x11_0000_xxxx_101x_01x0_xxxx')
+ description = "HPI_VMOV_register_A2"
+ mask, match = a32_opcode("xxxx_1110_1x11_0000_xxxx_101x_01x0_xxxx")
# cpsr, z, r39, z, hcptr, nsacr, cpacr, fpexc, scr, f4, f5, f0, f1
- srcRegsRelativeLats = \
- [5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ srcRegsRelativeLats = [
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 0,
+ ]
+
# VADD.I16 D/VADD.F32 D/VADD.I8 D/VADD.I32 D
class HPI_VADD2H_A32(MinorFUTiming):
- description = 'Vadd2hALU'
- mask, match = a32_opcode('1111_0010_0xxx_xxxx__xxxx_1000_xxx0_xxxx')
+ description = "Vadd2hALU"
+ mask, match = a32_opcode("1111_0010_0xxx_xxxx__xxxx_1000_xxx0_xxxx")
# cpsr, z, z, z, cpacr, fpexc, l0, r0, l1, r1, l2, r2, l3, r3 (for vadd2h)
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
# VAQQHN.I16 Q/VAQQHN.I32 Q/VAQQHN.I64 Q
class HPI_VADDHN_A32(MinorFUTiming):
- description = 'VaddhnALU'
- mask, match = a32_opcode('1111_0010_1xxx_xxxx__xxxx_0100_x0x0_xxxx')
+ description = "VaddhnALU"
+ mask, match = a32_opcode("1111_0010_1xxx_xxxx__xxxx_0100_x0x0_xxxx")
# cpsr, z, z, z, cpacr, fpexc, l0, l1, l2, l3, r0, r1, r2, r3
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
class HPI_VADDL_A32(MinorFUTiming):
- description = 'VaddlALU'
- mask, match = a32_opcode('1111_001x_1xxx_xxxx__xxxx_0000_x0x0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+ description = "VaddlALU"
+ mask, match = a32_opcode("1111_001x_1xxx_xxxx__xxxx_0000_x0x0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
class HPI_VADDW_A32(MinorFUTiming):
- description = 'HPI_VADDW_A32'
- mask, match = a32_opcode('1111_001x_1xxx_xxxx__xxxx_0001_x0x0_xxxx')
+ description = "HPI_VADDW_A32"
+ mask, match = a32_opcode("1111_001x_1xxx_xxxx__xxxx_0001_x0x0_xxxx")
# cpsr, z, z, z, cpacr, fpexc, l0, l1, l2, l3, r0, r1
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 3, 3, 0]
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 3, 3, 0]
+
# VHADD/VHSUB S8,S16,S32,U8,U16,U32 Q and D
class HPI_VHADD_A32(MinorFUTiming):
- description = 'HPI_VHADD_A32'
- mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_00x0_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VHADD_A32"
+ mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_00x0_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
class HPI_VPADAL_A32(MinorFUTiming):
- description = 'VpadalALU'
- mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0110_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+ description = "VpadalALU"
+ mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0110_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
# VPADDH.I16
class HPI_VPADDH_A32(MinorFUTiming):
- description = 'VpaddhALU'
- mask, match = a32_opcode('1111_0010_0xxx_xxxx__xxxx_1011_xxx1_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+ description = "VpaddhALU"
+ mask, match = a32_opcode("1111_0010_0xxx_xxxx__xxxx_1011_xxx1_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
# VPADDH.F32
class HPI_VPADDS_A32(MinorFUTiming):
- description = 'VpaddsALU'
- mask, match = a32_opcode('1111_0011_0x0x_xxxx__xxxx_1101_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+ description = "VpaddsALU"
+ mask, match = a32_opcode("1111_0011_0x0x_xxxx__xxxx_1101_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
# VPADDL.S16
class HPI_VPADDL_A32(MinorFUTiming):
- description = 'VpaddlALU'
- mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0010_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+ description = "VpaddlALU"
+ mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0010_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
# VRADDHN.I16
class HPI_VRADDHN_A32(MinorFUTiming):
- description = 'HPI_VRADDHN_A32'
- mask, match = a32_opcode('1111_0011_1xxx_xxxx__xxxx_0100_x0x0_xxxx')
+ description = "HPI_VRADDHN_A32"
+ mask, match = a32_opcode("1111_0011_1xxx_xxxx__xxxx_0100_x0x0_xxxx")
# cpsr, z, z, z, cpacr, fpexc, l0, l1, l2, l3, r0, r1, r2, r3
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
class HPI_VRHADD_A32(MinorFUTiming):
- description = 'VrhaddALU'
- mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_0001_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "VrhaddALU"
+ mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_0001_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
class HPI_VQADD_A32(MinorFUTiming):
- description = 'VqaddALU'
- mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_0000_xxx1_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+ description = "VqaddALU"
+ mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_0000_xxx1_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
class HPI_VANDQ_A32(MinorFUTiming):
- description = 'VandqALU'
- mask, match = a32_opcode('1111_0010_0x00_xxxx__xxxx_0001_xxx1_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+ description = "VandqALU"
+ mask, match = a32_opcode("1111_0010_0x00_xxxx__xxxx_0001_xxx1_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+
# VMUL (integer)
class HPI_VMULI_A32(MinorFUTiming):
- description = 'VmuliALU'
- mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_1001_xxx1_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+ description = "VmuliALU"
+ mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_1001_xxx1_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
# VBIC (reg)
class HPI_VBIC_A32(MinorFUTiming):
- description = 'VbicALU'
- mask, match = a32_opcode('1111_0010_0x01_xxxx__xxxx_0001_xxx1_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+ description = "VbicALU"
+ mask, match = a32_opcode("1111_0010_0x01_xxxx__xxxx_0001_xxx1_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+
# VBIF VBIT VBSL
class HPI_VBIF_ETC_A32(MinorFUTiming):
- description = 'VbifALU'
- mask, match = a32_opcode('1111_0011_0xxx_xxxx__xxxx_0001_xxx1_xxxx')
- srcRegsRelativeLats = \
- [0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+ description = "VbifALU"
+ mask, match = a32_opcode("1111_0011_0xxx_xxxx__xxxx_0001_xxx1_xxxx")
+ srcRegsRelativeLats = [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 0,
+ ]
+
class HPI_VACGE_A32(MinorFUTiming):
- description = 'VacgeALU'
- mask, match = a32_opcode('1111_0011_0xxx_xxxx__xxxx_1110_xxx1_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "VacgeALU"
+ mask, match = a32_opcode("1111_0011_0xxx_xxxx__xxxx_1110_xxx1_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
# VCEQ.F32
class HPI_VCEQ_A32(MinorFUTiming):
- description = 'VceqALU'
- mask, match = a32_opcode('1111_0010_0x0x_xxxx__xxxx_1110_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "VceqALU"
+ mask, match = a32_opcode("1111_0010_0x0x_xxxx__xxxx_1110_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
# VCEQ.[IS]... register
class HPI_VCEQI_A32(MinorFUTiming):
- description = 'VceqiALU'
- mask, match = a32_opcode('1111_0011_0xxx_xxxx__xxxx_1000_xxx1_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "VceqiALU"
+ mask, match = a32_opcode("1111_0011_0xxx_xxxx__xxxx_1000_xxx1_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
# VCEQ.[IS]... immediate
class HPI_VCEQII_A32(MinorFUTiming):
- description = 'HPI_VCEQII_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx01__xxxx_0x01_0xx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VCEQII_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx01__xxxx_0x01_0xx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
class HPI_VTST_A32(MinorFUTiming):
- description = 'HPI_VTST_A32'
- mask, match = a32_opcode('1111_0010_0xxx_xxxx__xxxx_1000_xxx1_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+ description = "HPI_VTST_A32"
+ mask, match = a32_opcode("1111_0010_0xxx_xxxx__xxxx_1000_xxx1_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
class HPI_VCLZ_A32(MinorFUTiming):
- description = 'HPI_VCLZ_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0100_1xx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VCLZ_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0100_1xx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
class HPI_VCNT_A32(MinorFUTiming):
- description = 'HPI_VCNT_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0101_0xx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VCNT_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0101_0xx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
class HPI_VEXT_A32(MinorFUTiming):
- description = 'HPI_VCNT_A32'
- mask, match = a32_opcode('1111_0010_1x11_xxxx__xxxx_xxxx_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VCNT_A32"
+ mask, match = a32_opcode("1111_0010_1x11_xxxx__xxxx_xxxx_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
# VMAX VMIN integer
class HPI_VMAXI_A32(MinorFUTiming):
- description = 'HPI_VMAXI_A32'
- mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_0110_xxxx_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VMAXI_A32"
+ mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_0110_xxxx_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
# VMAX VMIN float
class HPI_VMAXS_A32(MinorFUTiming):
- description = 'HPI_VMAXS_A32'
- mask, match = a32_opcode('1111_0010_0xxx_xxxx__xxxx_1111_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+ description = "HPI_VMAXS_A32"
+ mask, match = a32_opcode("1111_0010_0xxx_xxxx__xxxx_1111_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
# VNEG integer
class HPI_VNEGI_A32(MinorFUTiming):
- description = 'HPI_VNEGI_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx01__xxxx_0x11_1xx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VNEGI_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx01__xxxx_0x11_1xx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
# VNEG float
class HPI_VNEGF_A32(MinorFUTiming):
- description = 'HPI_VNEGF_A32'
- mask, match = a32_opcode('xxxx_1110_1x11_0001__xxxx_101x_01x0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+ description = "HPI_VNEGF_A32"
+ mask, match = a32_opcode("xxxx_1110_1x11_0001__xxxx_101x_01x0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
# VREV16 VREV32 VREV64
class HPI_VREVN_A32(MinorFUTiming):
- description = 'HPI_VREVN_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_000x_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VREVN_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_000x_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
class HPI_VQNEG_A32(MinorFUTiming):
- description = 'HPI_VQNEG_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0111_1xx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+ description = "HPI_VQNEG_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0111_1xx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
class HPI_VSWP_A32(MinorFUTiming):
- description = 'HPI_VSWP_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0000_0xx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VSWP_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0000_0xx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
class HPI_VTRN_A32(MinorFUTiming):
- description = 'HPI_VTRN_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0000_1xx0_xxxx')
+ description = "HPI_VTRN_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0000_1xx0_xxxx")
# cpsr, z, z, z, cpact, fpexc, o0, d0, o1, d1, o2, d2, o3, d3
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
# VQMOVN VQMOVUN
class HPI_VQMOVN_A32(MinorFUTiming):
- description = 'HPI_VQMOVN_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0010_xxx0_xxxx')
+ description = "HPI_VQMOVN_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0010_xxx0_xxxx")
# cpsr, z, z, z, cpact, fpexc, o[0], o[1], o[2], o[3], fpscr
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0]
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0]
+
# VUZP double word
class HPI_VUZP_A32(MinorFUTiming):
- description = 'HPI_VUZP_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0001_00x0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+ description = "HPI_VUZP_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0001_00x0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
# VDIV.F32
class HPI_VDIV32_A32(MinorFUTiming):
- description = 'HPI_VDIV32_A32'
- mask, match = a32_opcode('xxxx_1110_1x00_xxxx__xxxx_1010_x0x0_xxxx')
+ description = "HPI_VDIV32_A32"
+ mask, match = a32_opcode("xxxx_1110_1x00_xxxx__xxxx_1010_x0x0_xxxx")
# cpsr, z, z, z, cpact, fpexc, fpscr_exc, l, r
extraCommitLat = 9
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 20, 4, 4, 0]
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 20, 4, 4, 0]
+
# VDIV.F64
class HPI_VDIV64_A32(MinorFUTiming):
- description = 'HPI_VDIV64_A32'
- mask, match = a32_opcode('xxxx_1110_1x00_xxxx__xxxx_1011_x0x0_xxxx')
+ description = "HPI_VDIV64_A32"
+ mask, match = a32_opcode("xxxx_1110_1x00_xxxx__xxxx_1011_x0x0_xxxx")
# cpsr, z, z, z, cpact, fpexc, fpscr_exc, l, r
extraCommitLat = 18
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 20, 4, 4, 0]
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 20, 4, 4, 0]
+
class HPI_VZIP_A32(MinorFUTiming):
- description = 'HPI_VZIP_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0001_1xx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+ description = "HPI_VZIP_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0001_1xx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
# VPMAX integer
class HPI_VPMAX_A32(MinorFUTiming):
- description = 'HPI_VPMAX_A32'
- mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_1010_xxxx_xxxx')
+ description = "HPI_VPMAX_A32"
+ mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_1010_xxxx_xxxx")
# cpsr, z, z, z, cpact, fpexc, l0, r0, l1, r1, fpscr
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 0]
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 0]
+
# VPMAX float
class HPI_VPMAXF_A32(MinorFUTiming):
- description = 'HPI_VPMAXF_A32'
- mask, match = a32_opcode('1111_0011_0xxx_xxxx__xxxx_1111_xxx0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0]
+ description = "HPI_VPMAXF_A32"
+ mask, match = a32_opcode("1111_0011_0xxx_xxxx__xxxx_1111_xxx0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0]
+
class HPI_VMOVN_A32(MinorFUTiming):
- description = 'HPI_VMOVN_A32'
- mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0010_00x0_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0]
+ description = "HPI_VMOVN_A32"
+ mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0010_00x0_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0]
+
class HPI_VMOVL_A32(MinorFUTiming):
- description = 'HPI_VMOVL_A32'
- mask, match = a32_opcode('1111_001x_1xxx_x000__xxxx_1010_00x1_xxxx')
- srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0]
+ description = "HPI_VMOVL_A32"
+ mask, match = a32_opcode("1111_001x_1xxx_x000__xxxx_1010_00x1_xxxx")
+ srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0]
+
# VSQRT.F64
class HPI_VSQRT64_A32(MinorFUTiming):
- description = 'HPI_VSQRT64_A32'
- mask, match = a32_opcode('xxxx_1110_1x11_0001__xxxx_1011_11x0_xxxx')
+ description = "HPI_VSQRT64_A32"
+ mask, match = a32_opcode("xxxx_1110_1x11_0001__xxxx_1011_11x0_xxxx")
extraCommitLat = 18
srcRegsRelativeLats = []
+
# VSQRT.F32
class HPI_VSQRT32_A32(MinorFUTiming):
- description = 'HPI_VSQRT32_A32'
- mask, match = a32_opcode('xxxx_1110_1x11_0001__xxxx_1010_11x0_xxxx')
+ description = "HPI_VSQRT32_A32"
+ mask, match = a32_opcode("xxxx_1110_1x11_0001__xxxx_1010_11x0_xxxx")
extraCommitLat = 9
srcRegsRelativeLats = []
+
class HPI_FloatSimdFU(MinorFU):
- opClasses = minorMakeOpClassSet([
- 'FloatAdd', 'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatDiv',
- 'FloatSqrt', 'FloatMisc', 'FloatMultAcc',
- 'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt',
- 'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc',
- 'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp',
- 'SimdFloatCvt', 'SimdFloatDiv', 'SimdFloatMisc', 'SimdFloatMult',
- 'SimdFloatMultAcc', 'SimdFloatSqrt'])
+ opClasses = minorMakeOpClassSet(
+ [
+ "FloatAdd",
+ "FloatCmp",
+ "FloatCvt",
+ "FloatMult",
+ "FloatDiv",
+ "FloatSqrt",
+ "FloatMisc",
+ "FloatMultAcc",
+ "SimdAdd",
+ "SimdAddAcc",
+ "SimdAlu",
+ "SimdCmp",
+ "SimdCvt",
+ "SimdMisc",
+ "SimdMult",
+ "SimdMultAcc",
+ "SimdShift",
+ "SimdShiftAcc",
+ "SimdSqrt",
+ "SimdFloatAdd",
+ "SimdFloatAlu",
+ "SimdFloatCmp",
+ "SimdFloatCvt",
+ "SimdFloatDiv",
+ "SimdFloatMisc",
+ "SimdFloatMult",
+ "SimdFloatMultAcc",
+ "SimdFloatSqrt",
+ ]
+ )
timings = [
# VUZP and VZIP must be before VADDW/L
- HPI_VUZP_A32(), HPI_VZIP_A32(),
- HPI_VADD2H_A32(), HPI_VADDHN_A32(),
- HPI_VADDL_A32(), HPI_VADDW_A32(),
- HPI_VHADD_A32(), HPI_VPADAL_A32(),
- HPI_VPADDH_A32(), HPI_VPADDS_A32(),
- HPI_VPADDL_A32(), HPI_VRADDHN_A32(),
- HPI_VRHADD_A32(), HPI_VQADD_A32(),
- HPI_VANDQ_A32(), HPI_VBIC_A32(),
- HPI_VBIF_ETC_A32(), HPI_VACGE_A32(),
- HPI_VCEQ_A32(), HPI_VCEQI_A32(),
- HPI_VCEQII_A32(), HPI_VTST_A32(),
- HPI_VCLZ_A32(), HPI_VCNT_A32(),
- HPI_VEXT_A32(), HPI_VMAXI_A32(),
- HPI_VMAXS_A32(), HPI_VNEGI_A32(),
- HPI_VNEGF_A32(), HPI_VREVN_A32(),
- HPI_VQNEG_A32(), HPI_VSWP_A32(),
- HPI_VTRN_A32(), HPI_VPMAX_A32(),
- HPI_VPMAXF_A32(), HPI_VMOVN_A32(),
+ HPI_VUZP_A32(),
+ HPI_VZIP_A32(),
+ HPI_VADD2H_A32(),
+ HPI_VADDHN_A32(),
+ HPI_VADDL_A32(),
+ HPI_VADDW_A32(),
+ HPI_VHADD_A32(),
+ HPI_VPADAL_A32(),
+ HPI_VPADDH_A32(),
+ HPI_VPADDS_A32(),
+ HPI_VPADDL_A32(),
+ HPI_VRADDHN_A32(),
+ HPI_VRHADD_A32(),
+ HPI_VQADD_A32(),
+ HPI_VANDQ_A32(),
+ HPI_VBIC_A32(),
+ HPI_VBIF_ETC_A32(),
+ HPI_VACGE_A32(),
+ HPI_VCEQ_A32(),
+ HPI_VCEQI_A32(),
+ HPI_VCEQII_A32(),
+ HPI_VTST_A32(),
+ HPI_VCLZ_A32(),
+ HPI_VCNT_A32(),
+ HPI_VEXT_A32(),
+ HPI_VMAXI_A32(),
+ HPI_VMAXS_A32(),
+ HPI_VNEGI_A32(),
+ HPI_VNEGF_A32(),
+ HPI_VREVN_A32(),
+ HPI_VQNEG_A32(),
+ HPI_VSWP_A32(),
+ HPI_VTRN_A32(),
+ HPI_VPMAX_A32(),
+ HPI_VPMAXF_A32(),
+ HPI_VMOVN_A32(),
HPI_VMRS_A1(),
HPI_VMOV_immediate_A1(),
HPI_VMOV_register_A2(),
- HPI_VQMOVN_A32(), HPI_VMOVL_A32(),
- HPI_VDIV32_A32(), HPI_VDIV64_A32(),
- HPI_VSQRT32_A32(), HPI_VSQRT64_A32(),
+ HPI_VQMOVN_A32(),
+ HPI_VMOVL_A32(),
+ HPI_VDIV32_A32(),
+ HPI_VDIV64_A32(),
+ HPI_VSQRT32_A32(),
+ HPI_VSQRT64_A32(),
HPI_VMULI_A32(),
# Add before here
HPI_FMADD_A64(),
@@ -1153,14 +1489,17 @@
HPI_FMOV_A64(),
HPI_ADD_SUB_vector_scalar_A64(),
HPI_ADD_SUB_vector_vector_A64(),
- HPI_FDIV_scalar_32_A64(), HPI_FDIV_scalar_64_A64(),
+ HPI_FDIV_scalar_32_A64(),
+ HPI_FDIV_scalar_64_A64(),
HPI_DefaultA64Vfp(),
- HPI_DefaultVfp()]
+ HPI_DefaultVfp(),
+ ]
opLat = 6
+
class HPI_IntFU(MinorFU):
- opClasses = minorMakeOpClassSet(['IntAlu'])
+ opClasses = minorMakeOpClassSet(["IntAlu"])
# IMPORTANT! Keep the order below, add new entries *at the head*
timings = [
HPI_SSAT_USAT_no_shift_A1(),
@@ -1179,17 +1518,14 @@
HPI_SASX_SHASX_UASX_UHASX_A1(),
HPI_SHSAX_SSAX_UHSAX_USAX_A1(),
HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1(),
-
# Must be after HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1
HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1(),
-
HPI_SXTAB_T1(),
HPI_SXTAB16_T1(),
HPI_SXTAH_T1(),
HPI_SXTB_T2(),
HPI_SXTB16_T1(),
HPI_SXTH_T2(),
-
HPI_PKH_A1(),
HPI_PKH_T1(),
HPI_SBFX_UBFX_A1(),
@@ -1200,24 +1536,22 @@
HPI_USAD8_USADA8_A1(),
HPI_BFI_A1(),
HPI_BFI_T1(),
-
HPI_CMN_register_A1(),
HPI_CMN_immediate_A1(),
HPI_CMP_register_A1(),
HPI_CMP_immediate_A1(),
-
HPI_DataProcessingNoShift(),
HPI_DataProcessingMovShiftr(),
HPI_DataProcessingMayShift(),
-
HPI_Cxxx_A64(),
-
HPI_DefaultA64Int(),
- HPI_DefaultInt()]
+ HPI_DefaultInt(),
+ ]
opLat = 3
+
class HPI_Int2FU(MinorFU):
- opClasses = minorMakeOpClassSet(['IntAlu'])
+ opClasses = minorMakeOpClassSet(["IntAlu"])
# IMPORTANT! Keep the order below, add new entries *at the head*
timings = [
HPI_SSAT_USAT_no_shift_A1(),
@@ -1236,17 +1570,14 @@
HPI_SASX_SHASX_UASX_UHASX_A1(),
HPI_SHSAX_SSAX_UHSAX_USAX_A1(),
HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1(),
-
# Must be after HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1
HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1(),
-
HPI_SXTAB_T1(),
HPI_SXTAB16_T1(),
HPI_SXTAH_T1(),
HPI_SXTB_T2(),
HPI_SXTB16_T1(),
HPI_SXTH_T2(),
-
HPI_PKH_A1(),
HPI_PKH_T1(),
HPI_SBFX_UBFX_A1(),
@@ -1257,16 +1588,13 @@
HPI_USAD8_USADA8_A1_Suppress(),
HPI_BFI_A1(),
HPI_BFI_T1(),
-
- HPI_CMN_register_A1(), # Need to check for shift
+ HPI_CMN_register_A1(), # Need to check for shift
HPI_CMN_immediate_A1(),
- HPI_CMP_register_A1(), # Need to check for shift
+ HPI_CMP_register_A1(), # Need to check for shift
HPI_CMP_immediate_A1(),
-
HPI_DataProcessingNoShift(),
HPI_DataProcessingAllowShifti(),
# HPI_DataProcessingAllowMovShiftr(),
-
# Data processing ops that match SuppressShift but are *not*
# to be suppressed here
HPI_CLZ_A1(),
@@ -1275,63 +1603,80 @@
# Can you dual issue a branch?
# HPI_DataProcessingSuppressBranch(),
HPI_Cxxx_A64(),
-
HPI_DefaultA64Int(),
- HPI_DefaultInt()]
+ HPI_DefaultInt(),
+ ]
opLat = 3
+
class HPI_IntMulFU(MinorFU):
- opClasses = minorMakeOpClassSet(['IntMult'])
+ opClasses = minorMakeOpClassSet(["IntMult"])
timings = [
- HPI_MLA_A1(), HPI_MLA_T1(),
- HPI_MLS_A1(), HPI_MLS_T1(),
- HPI_SMLABB_A1(), HPI_SMLABB_T1(),
- HPI_SMLAWB_A1(), HPI_SMLAWB_T1(),
- HPI_SMLAD_A1(), HPI_SMLAD_T1(),
- HPI_SMMUL_A1(), HPI_SMMUL_T1(),
+ HPI_MLA_A1(),
+ HPI_MLA_T1(),
+ HPI_MLS_A1(),
+ HPI_MLS_T1(),
+ HPI_SMLABB_A1(),
+ HPI_SMLABB_T1(),
+ HPI_SMLAWB_A1(),
+ HPI_SMLAWB_T1(),
+ HPI_SMLAD_A1(),
+ HPI_SMLAD_T1(),
+ HPI_SMMUL_A1(),
+ HPI_SMMUL_T1(),
# SMMUL_A1 must be before SMMLA_A1
- HPI_SMMLA_A1(), HPI_SMMLA_T1(),
- HPI_SMMLS_A1(), HPI_SMMLS_T1(),
- HPI_UMAAL_A1(), HPI_UMAAL_T1(),
-
+ HPI_SMMLA_A1(),
+ HPI_SMMLA_T1(),
+ HPI_SMMLS_A1(),
+ HPI_SMMLS_T1(),
+ HPI_UMAAL_A1(),
+ HPI_UMAAL_T1(),
HPI_MADD_A64(),
HPI_DefaultA64Mul(),
- HPI_DefaultMul()]
+ HPI_DefaultMul(),
+ ]
opLat = 3
- cantForwardFromFUIndices = [0, 1, 5] # Int1, Int2, Mem
+ cantForwardFromFUIndices = [0, 1, 5] # Int1, Int2, Mem
+
class HPI_IntDivFU(MinorFU):
- opClasses = minorMakeOpClassSet(['IntDiv'])
- timings = [HPI_SDIV_A1(), HPI_UDIV_A1(),
- HPI_SDIV_A64()]
+ opClasses = minorMakeOpClassSet(["IntDiv"])
+ timings = [HPI_SDIV_A1(), HPI_UDIV_A1(), HPI_SDIV_A64()]
issueLat = 3
opLat = 3
+
class HPI_MemFU(MinorFU):
- opClasses = minorMakeOpClassSet(['MemRead', 'MemWrite', 'FloatMemRead',
- 'FloatMemWrite'])
+ opClasses = minorMakeOpClassSet(
+ ["MemRead", "MemWrite", "FloatMemRead", "FloatMemWrite"]
+ )
timings = [HPI_DefaultMem(), HPI_DefaultMem64()]
opLat = 1
- cantForwardFromFUIndices = [5] # Mem (this FU)
+ cantForwardFromFUIndices = [5] # Mem (this FU)
+
class HPI_MiscFU(MinorFU):
- opClasses = minorMakeOpClassSet(['IprAccess', 'InstPrefetch'])
+ opClasses = minorMakeOpClassSet(["IprAccess", "InstPrefetch"])
opLat = 1
+
class HPI_FUPool(MinorFUPool):
- funcUnits = [HPI_IntFU(), # 0
- HPI_Int2FU(), # 1
- HPI_IntMulFU(), # 2
- HPI_IntDivFU(), # 3
- HPI_FloatSimdFU(), # 4
- HPI_MemFU(), # 5
- HPI_MiscFU() # 6
- ]
+ funcUnits = [
+ HPI_IntFU(), # 0
+ HPI_Int2FU(), # 1
+ HPI_IntMulFU(), # 2
+ HPI_IntDivFU(), # 3
+ HPI_FloatSimdFU(), # 4
+ HPI_MemFU(), # 5
+ HPI_MiscFU(), # 6
+ ]
+
class HPI_MMU(ArmMMU):
itb = ArmTLB(entry_type="instruction", size=256)
dtb = ArmTLB(entry_type="data", size=256)
+
class HPI_BP(TournamentBP):
localPredictorSize = 64
localCtrBits = 2
@@ -1345,28 +1690,29 @@
RASSize = 8
instShiftAmt = 2
+
class HPI_ICache(Cache):
data_latency = 1
tag_latency = 1
response_latency = 1
mshrs = 2
tgts_per_mshr = 8
- size = '32kB'
+ size = "32kB"
assoc = 2
# No prefetcher, this is handled by the core
+
class HPI_DCache(Cache):
data_latency = 1
tag_latency = 1
response_latency = 1
mshrs = 4
tgts_per_mshr = 8
- size = '32kB'
+ size = "32kB"
assoc = 4
write_buffers = 4
- prefetcher = StridePrefetcher(
- queue_size=4,
- degree=4)
+ prefetcher = StridePrefetcher(queue_size=4, degree=4)
+
class HPI_L2(Cache):
data_latency = 13
@@ -1374,11 +1720,12 @@
response_latency = 5
mshrs = 4
tgts_per_mshr = 8
- size = '1024kB'
+ size = "1024kB"
assoc = 16
write_buffers = 16
# prefetcher FIXME
+
class HPI(ArmMinorCPU):
# Inherit the doc string from the module to avoid repeating it
# here.
@@ -1430,9 +1777,13 @@
mmu = HPI_MMU()
+
__all__ = [
"HPI_BP",
- "HPI_ITB", "HPI_DTB",
- "HPI_ICache", "HPI_DCache", "HPI_L2",
+ "HPI_ITB",
+ "HPI_DTB",
+ "HPI_ICache",
+ "HPI_DCache",
+ "HPI_L2",
"HPI",
]
diff --git a/configs/common/cores/arm/O3_ARM_v7a.py b/configs/common/cores/arm/O3_ARM_v7a.py
index d032a1a..77dc4e4 100644
--- a/configs/common/cores/arm/O3_ARM_v7a.py
+++ b/configs/common/cores/arm/O3_ARM_v7a.py
@@ -28,65 +28,82 @@
# Simple ALU Instructions have a latency of 1
class O3_ARM_v7a_Simple_Int(FUDesc):
- opList = [ OpDesc(opClass='IntAlu', opLat=1) ]
+ opList = [OpDesc(opClass="IntAlu", opLat=1)]
count = 2
+
# Complex ALU instructions have a variable latencies
class O3_ARM_v7a_Complex_Int(FUDesc):
- opList = [ OpDesc(opClass='IntMult', opLat=3, pipelined=True),
- OpDesc(opClass='IntDiv', opLat=12, pipelined=False),
- OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ]
+ opList = [
+ OpDesc(opClass="IntMult", opLat=3, pipelined=True),
+ OpDesc(opClass="IntDiv", opLat=12, pipelined=False),
+ OpDesc(opClass="IprAccess", opLat=3, pipelined=True),
+ ]
count = 1
# Floating point and SIMD instructions
class O3_ARM_v7a_FP(FUDesc):
- opList = [ OpDesc(opClass='SimdAdd', opLat=4),
- OpDesc(opClass='SimdAddAcc', opLat=4),
- OpDesc(opClass='SimdAlu', opLat=4),
- OpDesc(opClass='SimdCmp', opLat=4),
- OpDesc(opClass='SimdCvt', opLat=3),
- OpDesc(opClass='SimdMisc', opLat=3),
- OpDesc(opClass='SimdMult',opLat=5),
- OpDesc(opClass='SimdMultAcc',opLat=5),
- OpDesc(opClass='SimdShift',opLat=3),
- OpDesc(opClass='SimdShiftAcc', opLat=3),
- OpDesc(opClass='SimdSqrt', opLat=9),
- OpDesc(opClass='SimdFloatAdd',opLat=5),
- OpDesc(opClass='SimdFloatAlu',opLat=5),
- OpDesc(opClass='SimdFloatCmp', opLat=3),
- OpDesc(opClass='SimdFloatCvt', opLat=3),
- OpDesc(opClass='SimdFloatDiv', opLat=3),
- OpDesc(opClass='SimdFloatMisc', opLat=3),
- OpDesc(opClass='SimdFloatMult', opLat=3),
- OpDesc(opClass='SimdFloatMultAcc',opLat=5),
- OpDesc(opClass='SimdFloatSqrt', opLat=9),
- OpDesc(opClass='FloatAdd', opLat=5),
- OpDesc(opClass='FloatCmp', opLat=5),
- OpDesc(opClass='FloatCvt', opLat=5),
- OpDesc(opClass='FloatDiv', opLat=9, pipelined=False),
- OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
- OpDesc(opClass='FloatMult', opLat=4),
- OpDesc(opClass='FloatMultAcc', opLat=5),
- OpDesc(opClass='FloatMisc', opLat=3) ]
+ opList = [
+ OpDesc(opClass="SimdAdd", opLat=4),
+ OpDesc(opClass="SimdAddAcc", opLat=4),
+ OpDesc(opClass="SimdAlu", opLat=4),
+ OpDesc(opClass="SimdCmp", opLat=4),
+ OpDesc(opClass="SimdCvt", opLat=3),
+ OpDesc(opClass="SimdMisc", opLat=3),
+ OpDesc(opClass="SimdMult", opLat=5),
+ OpDesc(opClass="SimdMultAcc", opLat=5),
+ OpDesc(opClass="SimdShift", opLat=3),
+ OpDesc(opClass="SimdShiftAcc", opLat=3),
+ OpDesc(opClass="SimdSqrt", opLat=9),
+ OpDesc(opClass="SimdFloatAdd", opLat=5),
+ OpDesc(opClass="SimdFloatAlu", opLat=5),
+ OpDesc(opClass="SimdFloatCmp", opLat=3),
+ OpDesc(opClass="SimdFloatCvt", opLat=3),
+ OpDesc(opClass="SimdFloatDiv", opLat=3),
+ OpDesc(opClass="SimdFloatMisc", opLat=3),
+ OpDesc(opClass="SimdFloatMult", opLat=3),
+ OpDesc(opClass="SimdFloatMultAcc", opLat=5),
+ OpDesc(opClass="SimdFloatSqrt", opLat=9),
+ OpDesc(opClass="FloatAdd", opLat=5),
+ OpDesc(opClass="FloatCmp", opLat=5),
+ OpDesc(opClass="FloatCvt", opLat=5),
+ OpDesc(opClass="FloatDiv", opLat=9, pipelined=False),
+ OpDesc(opClass="FloatSqrt", opLat=33, pipelined=False),
+ OpDesc(opClass="FloatMult", opLat=4),
+ OpDesc(opClass="FloatMultAcc", opLat=5),
+ OpDesc(opClass="FloatMisc", opLat=3),
+ ]
count = 2
# Load/Store Units
class O3_ARM_v7a_Load(FUDesc):
- opList = [ OpDesc(opClass='MemRead',opLat=2),
- OpDesc(opClass='FloatMemRead',opLat=2) ]
+ opList = [
+ OpDesc(opClass="MemRead", opLat=2),
+ OpDesc(opClass="FloatMemRead", opLat=2),
+ ]
count = 1
+
class O3_ARM_v7a_Store(FUDesc):
- opList = [ OpDesc(opClass='MemWrite',opLat=2),
- OpDesc(opClass='FloatMemWrite',opLat=2) ]
+ opList = [
+ OpDesc(opClass="MemWrite", opLat=2),
+ OpDesc(opClass="FloatMemWrite", opLat=2),
+ ]
count = 1
+
# Functional Units for this CPU
class O3_ARM_v7a_FUP(FUPool):
- FUList = [O3_ARM_v7a_Simple_Int(), O3_ARM_v7a_Complex_Int(),
- O3_ARM_v7a_Load(), O3_ARM_v7a_Store(), O3_ARM_v7a_FP()]
+ FUList = [
+ O3_ARM_v7a_Simple_Int(),
+ O3_ARM_v7a_Complex_Int(),
+ O3_ARM_v7a_Load(),
+ O3_ARM_v7a_Store(),
+ O3_ARM_v7a_FP(),
+ ]
+
# Bi-Mode Branch Predictor
class O3_ARM_v7a_BP(BiModeBP):
@@ -99,6 +116,7 @@
RASSize = 16
instShiftAmt = 2
+
class O3_ARM_v7a_3(ArmO3CPU):
LQEntries = 16
SQEntries = 16
@@ -143,6 +161,7 @@
switched_out = False
branchPred = O3_ARM_v7a_BP()
+
# Instruction Cache
class O3_ARM_v7a_ICache(Cache):
tag_latency = 1
@@ -150,12 +169,13 @@
response_latency = 1
mshrs = 2
tgts_per_mshr = 8
- size = '32kB'
+ size = "32kB"
assoc = 2
is_read_only = True
# Writeback clean lines as well
writeback_clean = True
+
# Data Cache
class O3_ARM_v7a_DCache(Cache):
tag_latency = 2
@@ -163,12 +183,13 @@
response_latency = 2
mshrs = 6
tgts_per_mshr = 8
- size = '32kB'
+ size = "32kB"
assoc = 2
write_buffers = 16
# Consider the L2 a victim cache also for clean lines
writeback_clean = True
+
# L2 Cache
class O3_ARM_v7aL2(Cache):
tag_latency = 12
@@ -176,12 +197,12 @@
response_latency = 12
mshrs = 16
tgts_per_mshr = 8
- size = '1MB'
+ size = "1MB"
assoc = 16
write_buffers = 8
prefetch_on_access = True
- clusivity = 'mostly_excl'
+ clusivity = "mostly_excl"
# Simple stride prefetcher
- prefetcher = StridePrefetcher(degree=8, latency = 1)
+ prefetcher = StridePrefetcher(degree=8, latency=1)
tags = BaseSetAssoc()
replacement_policy = RandomRP()
diff --git a/configs/common/cores/arm/__init__.py b/configs/common/cores/arm/__init__.py
index dbc3b3e..135b75f 100644
--- a/configs/common/cores/arm/__init__.py
+++ b/configs/common/cores/arm/__init__.py
@@ -36,9 +36,7 @@
from pkgutil import iter_modules
from importlib import import_module
-_cpu_modules = [
- name for _, name, ispkg in iter_modules(__path__) if not ispkg
-]
+_cpu_modules = [name for _, name, ispkg in iter_modules(__path__) if not ispkg]
for c in _cpu_modules:
try:
diff --git a/configs/common/cores/arm/ex5_LITTLE.py b/configs/common/cores/arm/ex5_LITTLE.py
index 57f6a6b..6974837 100644
--- a/configs/common/cores/arm/ex5_LITTLE.py
+++ b/configs/common/cores/arm/ex5_LITTLE.py
@@ -27,70 +27,89 @@
from m5.objects import *
-#-----------------------------------------------------------------------
+# -----------------------------------------------------------------------
# ex5 LITTLE core (based on the ARM Cortex-A7)
-#-----------------------------------------------------------------------
+# -----------------------------------------------------------------------
# Simple ALU Instructions have a latency of 3
class ex5_LITTLE_Simple_Int(MinorDefaultIntFU):
- opList = [ OpDesc(opClass='IntAlu', opLat=4) ]
+ opList = [OpDesc(opClass="IntAlu", opLat=4)]
+
# Complex ALU instructions have a variable latencies
class ex5_LITTLE_Complex_IntMul(MinorDefaultIntMulFU):
- opList = [ OpDesc(opClass='IntMult', opLat=7) ]
+ opList = [OpDesc(opClass="IntMult", opLat=7)]
+
class ex5_LITTLE_Complex_IntDiv(MinorDefaultIntDivFU):
- opList = [ OpDesc(opClass='IntDiv', opLat=9) ]
+ opList = [OpDesc(opClass="IntDiv", opLat=9)]
+
# Floating point and SIMD instructions
class ex5_LITTLE_FP(MinorDefaultFloatSimdFU):
- opList = [ OpDesc(opClass='SimdAdd', opLat=6),
- OpDesc(opClass='SimdAddAcc', opLat=4),
- OpDesc(opClass='SimdAlu', opLat=4),
- OpDesc(opClass='SimdCmp', opLat=1),
- OpDesc(opClass='SimdCvt', opLat=3),
- OpDesc(opClass='SimdMisc', opLat=3),
- OpDesc(opClass='SimdMult',opLat=4),
- OpDesc(opClass='SimdMultAcc',opLat=5),
- OpDesc(opClass='SimdShift',opLat=3),
- OpDesc(opClass='SimdShiftAcc', opLat=3),
- OpDesc(opClass='SimdSqrt', opLat=9),
- OpDesc(opClass='SimdFloatAdd',opLat=8),
- OpDesc(opClass='SimdFloatAlu',opLat=6),
- OpDesc(opClass='SimdFloatCmp', opLat=6),
- OpDesc(opClass='SimdFloatCvt', opLat=6),
- OpDesc(opClass='SimdFloatDiv', opLat=20, pipelined=False),
- OpDesc(opClass='SimdFloatMisc', opLat=6),
- OpDesc(opClass='SimdFloatMult', opLat=15),
- OpDesc(opClass='SimdFloatMultAcc',opLat=6),
- OpDesc(opClass='SimdFloatSqrt', opLat=17),
- OpDesc(opClass='FloatAdd', opLat=8),
- OpDesc(opClass='FloatCmp', opLat=6),
- OpDesc(opClass='FloatCvt', opLat=6),
- OpDesc(opClass='FloatDiv', opLat=15, pipelined=False),
- OpDesc(opClass='FloatSqrt', opLat=33),
- OpDesc(opClass='FloatMult', opLat=6) ]
+ opList = [
+ OpDesc(opClass="SimdAdd", opLat=6),
+ OpDesc(opClass="SimdAddAcc", opLat=4),
+ OpDesc(opClass="SimdAlu", opLat=4),
+ OpDesc(opClass="SimdCmp", opLat=1),
+ OpDesc(opClass="SimdCvt", opLat=3),
+ OpDesc(opClass="SimdMisc", opLat=3),
+ OpDesc(opClass="SimdMult", opLat=4),
+ OpDesc(opClass="SimdMultAcc", opLat=5),
+ OpDesc(opClass="SimdShift", opLat=3),
+ OpDesc(opClass="SimdShiftAcc", opLat=3),
+ OpDesc(opClass="SimdSqrt", opLat=9),
+ OpDesc(opClass="SimdFloatAdd", opLat=8),
+ OpDesc(opClass="SimdFloatAlu", opLat=6),
+ OpDesc(opClass="SimdFloatCmp", opLat=6),
+ OpDesc(opClass="SimdFloatCvt", opLat=6),
+ OpDesc(opClass="SimdFloatDiv", opLat=20, pipelined=False),
+ OpDesc(opClass="SimdFloatMisc", opLat=6),
+ OpDesc(opClass="SimdFloatMult", opLat=15),
+ OpDesc(opClass="SimdFloatMultAcc", opLat=6),
+ OpDesc(opClass="SimdFloatSqrt", opLat=17),
+ OpDesc(opClass="FloatAdd", opLat=8),
+ OpDesc(opClass="FloatCmp", opLat=6),
+ OpDesc(opClass="FloatCvt", opLat=6),
+ OpDesc(opClass="FloatDiv", opLat=15, pipelined=False),
+ OpDesc(opClass="FloatSqrt", opLat=33),
+ OpDesc(opClass="FloatMult", opLat=6),
+ ]
+
# Load/Store Units
class ex5_LITTLE_MemFU(MinorDefaultMemFU):
- opList = [ OpDesc(opClass='MemRead',opLat=1),
- OpDesc(opClass='MemWrite',opLat=1) ]
+ opList = [
+ OpDesc(opClass="MemRead", opLat=1),
+ OpDesc(opClass="MemWrite", opLat=1),
+ ]
+
# Misc Unit
class ex5_LITTLE_MiscFU(MinorDefaultMiscFU):
- opList = [ OpDesc(opClass='IprAccess',opLat=1),
- OpDesc(opClass='InstPrefetch',opLat=1) ]
+ opList = [
+ OpDesc(opClass="IprAccess", opLat=1),
+ OpDesc(opClass="InstPrefetch", opLat=1),
+ ]
+
# Functional Units for this CPU
class ex5_LITTLE_FUP(MinorFUPool):
- funcUnits = [ex5_LITTLE_Simple_Int(), ex5_LITTLE_Simple_Int(),
- ex5_LITTLE_Complex_IntMul(), ex5_LITTLE_Complex_IntDiv(),
- ex5_LITTLE_FP(), ex5_LITTLE_MemFU(),
- ex5_LITTLE_MiscFU()]
+ funcUnits = [
+ ex5_LITTLE_Simple_Int(),
+ ex5_LITTLE_Simple_Int(),
+ ex5_LITTLE_Complex_IntMul(),
+ ex5_LITTLE_Complex_IntDiv(),
+ ex5_LITTLE_FP(),
+ ex5_LITTLE_MemFU(),
+ ex5_LITTLE_MiscFU(),
+ ]
+
class ex5_LITTLE(ArmMinorCPU):
executeFuncUnits = ex5_LITTLE_FUP()
+
class L1Cache(Cache):
tag_latency = 2
data_latency = 2
@@ -99,19 +118,22 @@
# Consider the L2 a victim cache also for clean lines
writeback_clean = True
+
class L1I(L1Cache):
mshrs = 2
- size = '32kB'
+ size = "32kB"
assoc = 2
is_read_only = True
tgts_per_mshr = 20
+
class L1D(L1Cache):
mshrs = 4
- size = '32kB'
+ size = "32kB"
assoc = 4
write_buffers = 4
+
# L2 Cache
class L2(Cache):
tag_latency = 9
@@ -119,12 +141,12 @@
response_latency = 9
mshrs = 8
tgts_per_mshr = 12
- size = '512kB'
+ size = "512kB"
assoc = 8
write_buffers = 16
prefetch_on_access = True
- clusivity = 'mostly_excl'
+ clusivity = "mostly_excl"
# Simple stride prefetcher
- prefetcher = StridePrefetcher(degree=1, latency = 1)
+ prefetcher = StridePrefetcher(degree=1, latency=1)
tags = BaseSetAssoc()
replacement_policy = RandomRP()
diff --git a/configs/common/cores/arm/ex5_big.py b/configs/common/cores/arm/ex5_big.py
index de7a450..70af6b8 100644
--- a/configs/common/cores/arm/ex5_big.py
+++ b/configs/common/cores/arm/ex5_big.py
@@ -27,66 +27,80 @@
from m5.objects import *
-#-----------------------------------------------------------------------
+# -----------------------------------------------------------------------
# ex5 big core (based on the ARM Cortex-A15)
-#-----------------------------------------------------------------------
+# -----------------------------------------------------------------------
# Simple ALU Instructions have a latency of 1
class ex5_big_Simple_Int(FUDesc):
- opList = [ OpDesc(opClass='IntAlu', opLat=1) ]
+ opList = [OpDesc(opClass="IntAlu", opLat=1)]
count = 2
+
# Complex ALU instructions have a variable latencies
class ex5_big_Complex_Int(FUDesc):
- opList = [ OpDesc(opClass='IntMult', opLat=4, pipelined=True),
- OpDesc(opClass='IntDiv', opLat=11, pipelined=False),
- OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ]
+ opList = [
+ OpDesc(opClass="IntMult", opLat=4, pipelined=True),
+ OpDesc(opClass="IntDiv", opLat=11, pipelined=False),
+ OpDesc(opClass="IprAccess", opLat=3, pipelined=True),
+ ]
count = 1
+
# Floating point and SIMD instructions
class ex5_big_FP(FUDesc):
- opList = [ OpDesc(opClass='SimdAdd', opLat=3),
- OpDesc(opClass='SimdAddAcc', opLat=4),
- OpDesc(opClass='SimdAlu', opLat=4),
- OpDesc(opClass='SimdCmp', opLat=4),
- OpDesc(opClass='SimdCvt', opLat=3),
- OpDesc(opClass='SimdMisc', opLat=3),
- OpDesc(opClass='SimdMult',opLat=6),
- OpDesc(opClass='SimdMultAcc',opLat=5),
- OpDesc(opClass='SimdShift',opLat=3),
- OpDesc(opClass='SimdShiftAcc', opLat=3),
- OpDesc(opClass='SimdSqrt', opLat=9),
- OpDesc(opClass='SimdFloatAdd',opLat=6),
- OpDesc(opClass='SimdFloatAlu',opLat=5),
- OpDesc(opClass='SimdFloatCmp', opLat=3),
- OpDesc(opClass='SimdFloatCvt', opLat=3),
- OpDesc(opClass='SimdFloatDiv', opLat=21),
- OpDesc(opClass='SimdFloatMisc', opLat=3),
- OpDesc(opClass='SimdFloatMult', opLat=6),
- OpDesc(opClass='SimdFloatMultAcc',opLat=1),
- OpDesc(opClass='SimdFloatSqrt', opLat=9),
- OpDesc(opClass='FloatAdd', opLat=6),
- OpDesc(opClass='FloatCmp', opLat=5),
- OpDesc(opClass='FloatCvt', opLat=5),
- OpDesc(opClass='FloatDiv', opLat=12, pipelined=False),
- OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
- OpDesc(opClass='FloatMult', opLat=8) ]
+ opList = [
+ OpDesc(opClass="SimdAdd", opLat=3),
+ OpDesc(opClass="SimdAddAcc", opLat=4),
+ OpDesc(opClass="SimdAlu", opLat=4),
+ OpDesc(opClass="SimdCmp", opLat=4),
+ OpDesc(opClass="SimdCvt", opLat=3),
+ OpDesc(opClass="SimdMisc", opLat=3),
+ OpDesc(opClass="SimdMult", opLat=6),
+ OpDesc(opClass="SimdMultAcc", opLat=5),
+ OpDesc(opClass="SimdShift", opLat=3),
+ OpDesc(opClass="SimdShiftAcc", opLat=3),
+ OpDesc(opClass="SimdSqrt", opLat=9),
+ OpDesc(opClass="SimdFloatAdd", opLat=6),
+ OpDesc(opClass="SimdFloatAlu", opLat=5),
+ OpDesc(opClass="SimdFloatCmp", opLat=3),
+ OpDesc(opClass="SimdFloatCvt", opLat=3),
+ OpDesc(opClass="SimdFloatDiv", opLat=21),
+ OpDesc(opClass="SimdFloatMisc", opLat=3),
+ OpDesc(opClass="SimdFloatMult", opLat=6),
+ OpDesc(opClass="SimdFloatMultAcc", opLat=1),
+ OpDesc(opClass="SimdFloatSqrt", opLat=9),
+ OpDesc(opClass="FloatAdd", opLat=6),
+ OpDesc(opClass="FloatCmp", opLat=5),
+ OpDesc(opClass="FloatCvt", opLat=5),
+ OpDesc(opClass="FloatDiv", opLat=12, pipelined=False),
+ OpDesc(opClass="FloatSqrt", opLat=33, pipelined=False),
+ OpDesc(opClass="FloatMult", opLat=8),
+ ]
count = 2
# Load/Store Units
class ex5_big_Load(FUDesc):
- opList = [ OpDesc(opClass='MemRead',opLat=2) ]
+ opList = [OpDesc(opClass="MemRead", opLat=2)]
count = 1
+
class ex5_big_Store(FUDesc):
- opList = [OpDesc(opClass='MemWrite',opLat=2) ]
+ opList = [OpDesc(opClass="MemWrite", opLat=2)]
count = 1
+
# Functional Units for this CPU
class ex5_big_FUP(FUPool):
- FUList = [ex5_big_Simple_Int(), ex5_big_Complex_Int(),
- ex5_big_Load(), ex5_big_Store(), ex5_big_FP()]
+ FUList = [
+ ex5_big_Simple_Int(),
+ ex5_big_Complex_Int(),
+ ex5_big_Load(),
+ ex5_big_Store(),
+ ex5_big_FP(),
+ ]
+
# Bi-Mode Branch Predictor
class ex5_big_BP(BiModeBP):
@@ -99,6 +113,7 @@
RASSize = 48
instShiftAmt = 2
+
class ex5_big(ArmO3CPU):
LQEntries = 16
SQEntries = 16
@@ -142,6 +157,7 @@
switched_out = False
branchPred = ex5_big_BP()
+
class L1Cache(Cache):
tag_latency = 2
data_latency = 2
@@ -150,20 +166,23 @@
# Consider the L2 a victim cache also for clean lines
writeback_clean = True
+
# Instruction Cache
class L1I(L1Cache):
mshrs = 2
- size = '32kB'
+ size = "32kB"
assoc = 2
is_read_only = True
+
# Data Cache
class L1D(L1Cache):
mshrs = 6
- size = '32kB'
+ size = "32kB"
assoc = 2
write_buffers = 16
+
# L2 Cache
class L2(Cache):
tag_latency = 15
@@ -171,12 +190,12 @@
response_latency = 15
mshrs = 16
tgts_per_mshr = 8
- size = '2MB'
+ size = "2MB"
assoc = 16
write_buffers = 8
prefetch_on_access = True
- clusivity = 'mostly_excl'
+ clusivity = "mostly_excl"
# Simple stride prefetcher
- prefetcher = StridePrefetcher(degree=8, latency = 1)
+ prefetcher = StridePrefetcher(degree=8, latency=1)
tags = BaseSetAssoc()
replacement_policy = RandomRP()
diff --git a/configs/common/cpu2000.py b/configs/common/cpu2000.py
index 266bba0..3b1b390 100644
--- a/configs/common/cpu2000.py
+++ b/configs/common/cpu2000.py
@@ -29,7 +29,8 @@
from os.path import basename, exists, join as joinpath, normpath
from os.path import isdir, isfile, islink
-spec_dist = os.environ.get('M5_CPU2000', '/dist/m5/cpu2000')
+spec_dist = os.environ.get("M5_CPU2000", "/dist/m5/cpu2000")
+
def copyfiles(srcdir, dstdir):
from filecmp import cmp as filecmp
@@ -45,8 +46,8 @@
root = normpath(root)
prefix = os.path.commonprefix([root, srcdir])
- root = root[len(prefix):]
- if root.startswith('/'):
+ root = root[len(prefix) :]
+ if root.startswith("/"):
root = root[1:]
for entry in dirs:
@@ -62,68 +63,70 @@
# some of the spec benchmarks expect to be run from one directory up.
# just create some symlinks that solve the problem
- inlink = joinpath(dstdir, 'input')
- outlink = joinpath(dstdir, 'output')
+ inlink = joinpath(dstdir, "input")
+ outlink = joinpath(dstdir, "output")
if not exists(inlink):
- os.symlink('.', inlink)
+ os.symlink(".", inlink)
if not exists(outlink):
- os.symlink('.', outlink)
+ os.symlink(".", outlink)
+
class Benchmark(object):
def __init__(self, isa, os, input_set):
- if not hasattr(self.__class__, 'name'):
+ if not hasattr(self.__class__, "name"):
self.name = self.__class__.__name__
- if not hasattr(self.__class__, 'binary'):
+ if not hasattr(self.__class__, "binary"):
self.binary = self.name
- if not hasattr(self.__class__, 'args'):
+ if not hasattr(self.__class__, "args"):
self.args = []
- if not hasattr(self.__class__, 'output'):
- self.output = '%s.out' % self.name
+ if not hasattr(self.__class__, "output"):
+ self.output = "%s.out" % self.name
- if not hasattr(self.__class__, 'simpoint'):
+ if not hasattr(self.__class__, "simpoint"):
self.simpoint = None
try:
func = getattr(self.__class__, input_set)
except AttributeError:
raise AttributeError(
- 'The benchmark %s does not have the %s input set' % \
- (self.name, input_set))
+ "The benchmark %s does not have the %s input set"
+ % (self.name, input_set)
+ )
- executable = joinpath(spec_dist, 'binaries', isa, os, self.binary)
+ executable = joinpath(spec_dist, "binaries", isa, os, self.binary)
if not isfile(executable):
- raise AttributeError('%s not found' % executable)
+ raise AttributeError("%s not found" % executable)
self.executable = executable
# root of tree for input & output data files
- data_dir = joinpath(spec_dist, 'data', self.name)
+ data_dir = joinpath(spec_dist, "data", self.name)
# optional subtree with files shared across input sets
- all_dir = joinpath(data_dir, 'all')
+ all_dir = joinpath(data_dir, "all")
# dirs for input & output files for this input set
- inputs_dir = joinpath(data_dir, input_set, 'input')
- outputs_dir = joinpath(data_dir, input_set, 'output')
+ inputs_dir = joinpath(data_dir, input_set, "input")
+ outputs_dir = joinpath(data_dir, input_set, "output")
# keep around which input set was specified
self.input_set = input_set
if not isdir(inputs_dir):
- raise AttributeError('%s not found' % inputs_dir)
+ raise AttributeError("%s not found" % inputs_dir)
- self.inputs_dir = [ inputs_dir ]
+ self.inputs_dir = [inputs_dir]
if isdir(all_dir):
- self.inputs_dir += [ joinpath(all_dir, 'input') ]
+ self.inputs_dir += [joinpath(all_dir, "input")]
if isdir(outputs_dir):
self.outputs_dir = outputs_dir
- if not hasattr(self.__class__, 'stdin'):
- self.stdin = joinpath(inputs_dir, '%s.in' % self.name)
+ if not hasattr(self.__class__, "stdin"):
+ self.stdin = joinpath(inputs_dir, "%s.in" % self.name)
if not isfile(self.stdin):
self.stdin = None
- if not hasattr(self.__class__, 'stdout'):
- self.stdout = joinpath(outputs_dir, '%s.out' % self.name)
+ if not hasattr(self.__class__, "stdout"):
+ self.stdout = joinpath(outputs_dir, "%s.out" % self.name)
if not isfile(self.stdout):
self.stdout = None
@@ -132,14 +135,14 @@
def makeProcessArgs(self, **kwargs):
# set up default args for Process object
process_args = {}
- process_args['cmd'] = [ self.name ] + self.args
- process_args['executable'] = self.executable
+ process_args["cmd"] = [self.name] + self.args
+ process_args["executable"] = self.executable
if self.stdin:
- process_args['input'] = self.stdin
+ process_args["input"] = self.stdin
if self.stdout:
- process_args['output'] = self.stdout
+ process_args["output"] = self.stdout
if self.simpoint:
- process_args['simpoint'] = self.simpoint
+ process_args["simpoint"] = self.simpoint
# explicit keywords override defaults
process_args.update(kwargs)
@@ -150,12 +153,13 @@
# figure out working directory: use m5's outdir unless
# overridden by Process's cwd param
- cwd = process_args.get('cwd')
+ cwd = process_args.get("cwd")
if not cwd:
from m5 import options
+
cwd = options.outdir
- process_args['cwd'] = cwd
+ process_args["cwd"] = cwd
if not isdir(cwd):
os.makedirs(cwd)
# copy input files to working directory
@@ -163,590 +167,872 @@
copyfiles(d, cwd)
# generate Process object
from m5.objects import Process
+
return Process(**process_args)
def __str__(self):
return self.name
+
class DefaultBenchmark(Benchmark):
- def ref(self, isa, os): pass
- def test(self, isa, os): pass
- def train(self, isa, os): pass
-
-class MinneDefaultBenchmark(DefaultBenchmark):
- def smred(self, isa, os): pass
- def mdred(self, isa, os): pass
- def lgred(self, isa, os): pass
-
-class ammp(MinneDefaultBenchmark):
- name = 'ammp'
- number = 188
- lang = 'C'
- simpoint = 108*100E6
-
-class applu(MinneDefaultBenchmark):
- name = 'applu'
- number = 173
- lang = 'F77'
- simpoint = 2179*100E6
-
-class apsi(MinneDefaultBenchmark):
- name = 'apsi'
- number = 301
- lang = 'F77'
- simpoint = 3408*100E6
-
-class art(DefaultBenchmark):
- name = 'art'
- number = 179
- lang = 'C'
+ def ref(self, isa, os):
+ pass
def test(self, isa, os):
- self.args = [ '-scanfile', 'c756hel.in',
- '-trainfile1', 'a10.img',
- '-stride', '2',
- '-startx', '134',
- '-starty', '220',
- '-endx', '139',
- '-endy', '225',
- '-objects', '1' ]
- self.output = 'test.out'
+ pass
def train(self, isa, os):
- self.args = [ '-scanfile', 'c756hel.in',
- '-trainfile1', 'a10.img',
- '-stride', '2',
- '-startx', '134',
- '-starty', '220',
- '-endx', '184',
- '-endy', '240',
- '-objects', '3' ]
- self.output = 'train.out'
+ pass
+
+
+class MinneDefaultBenchmark(DefaultBenchmark):
+ def smred(self, isa, os):
+ pass
+
+ def mdred(self, isa, os):
+ pass
def lgred(self, isa, os):
- self.args = ['-scanfile', 'c756hel.in',
- '-trainfile1', 'a10.img',
- '-stride', '5',
- '-startx', '134',
- '-starty', '220',
- '-endx', '184',
- '-endy', '240',
- '-objects', '1' ]
- self.output = 'lgred.out'
+ pass
+
+
+class ammp(MinneDefaultBenchmark):
+ name = "ammp"
+ number = 188
+ lang = "C"
+ simpoint = 108 * 100e6
+
+
+class applu(MinneDefaultBenchmark):
+ name = "applu"
+ number = 173
+ lang = "F77"
+ simpoint = 2179 * 100e6
+
+
+class apsi(MinneDefaultBenchmark):
+ name = "apsi"
+ number = 301
+ lang = "F77"
+ simpoint = 3408 * 100e6
+
+
+class art(DefaultBenchmark):
+ name = "art"
+ number = 179
+ lang = "C"
+
+ def test(self, isa, os):
+ self.args = [
+ "-scanfile",
+ "c756hel.in",
+ "-trainfile1",
+ "a10.img",
+ "-stride",
+ "2",
+ "-startx",
+ "134",
+ "-starty",
+ "220",
+ "-endx",
+ "139",
+ "-endy",
+ "225",
+ "-objects",
+ "1",
+ ]
+ self.output = "test.out"
+
+ def train(self, isa, os):
+ self.args = [
+ "-scanfile",
+ "c756hel.in",
+ "-trainfile1",
+ "a10.img",
+ "-stride",
+ "2",
+ "-startx",
+ "134",
+ "-starty",
+ "220",
+ "-endx",
+ "184",
+ "-endy",
+ "240",
+ "-objects",
+ "3",
+ ]
+ self.output = "train.out"
+
+ def lgred(self, isa, os):
+ self.args = [
+ "-scanfile",
+ "c756hel.in",
+ "-trainfile1",
+ "a10.img",
+ "-stride",
+ "5",
+ "-startx",
+ "134",
+ "-starty",
+ "220",
+ "-endx",
+ "184",
+ "-endy",
+ "240",
+ "-objects",
+ "1",
+ ]
+ self.output = "lgred.out"
class art110(art):
def ref(self, isa, os):
- self.args = [ '-scanfile', 'c756hel.in',
- '-trainfile1', 'a10.img',
- '-trainfile2', 'hc.img',
- '-stride', '2',
- '-startx', '110',
- '-starty', '200',
- '-endx', '160',
- '-endy', '240',
- '-objects', '10' ]
- self.output = 'ref.1.out'
- self.simpoint = 340*100E6
+ self.args = [
+ "-scanfile",
+ "c756hel.in",
+ "-trainfile1",
+ "a10.img",
+ "-trainfile2",
+ "hc.img",
+ "-stride",
+ "2",
+ "-startx",
+ "110",
+ "-starty",
+ "200",
+ "-endx",
+ "160",
+ "-endy",
+ "240",
+ "-objects",
+ "10",
+ ]
+ self.output = "ref.1.out"
+ self.simpoint = 340 * 100e6
+
class art470(art):
def ref(self, isa, os):
- self.args = [ '-scanfile', 'c756hel.in',
- '-trainfile1', 'a10.img',
- '-trainfile2', 'hc.img',
- '-stride', '2',
- '-startx', '470',
- '-starty', '140',
- '-endx', '520',
- '-endy', '180',
- '-objects', '10' ]
- self.output = 'ref.2.out'
- self.simpoint = 365*100E6
+ self.args = [
+ "-scanfile",
+ "c756hel.in",
+ "-trainfile1",
+ "a10.img",
+ "-trainfile2",
+ "hc.img",
+ "-stride",
+ "2",
+ "-startx",
+ "470",
+ "-starty",
+ "140",
+ "-endx",
+ "520",
+ "-endy",
+ "180",
+ "-objects",
+ "10",
+ ]
+ self.output = "ref.2.out"
+ self.simpoint = 365 * 100e6
+
class equake(DefaultBenchmark):
- name = 'equake'
+ name = "equake"
number = 183
- lang = 'C'
- simpoint = 812*100E6
+ lang = "C"
+ simpoint = 812 * 100e6
- def lgred(self, isa, os): pass
+ def lgred(self, isa, os):
+ pass
+
class facerec(MinneDefaultBenchmark):
- name = 'facerec'
+ name = "facerec"
number = 187
- lang = 'F'
- simpoint = 375*100E6
+ lang = "F"
+ simpoint = 375 * 100e6
+
class fma3d(MinneDefaultBenchmark):
- name = 'fma3d'
+ name = "fma3d"
number = 191
- lang = 'F'
- simpoint = 2541*100E6
+ lang = "F"
+ simpoint = 2541 * 100e6
+
class galgel(MinneDefaultBenchmark):
- name = 'galgel'
+ name = "galgel"
number = 178
- lang = 'F'
- simpoint = 2491*100E6
+ lang = "F"
+ simpoint = 2491 * 100e6
+
class lucas(MinneDefaultBenchmark):
- name = 'lucas'
+ name = "lucas"
number = 189
- lang = 'F'
- simpoint = 545*100E6
+ lang = "F"
+ simpoint = 545 * 100e6
+
class mesa(Benchmark):
- name = 'mesa'
+ name = "mesa"
number = 177
- lang = 'C'
+ lang = "C"
stdin = None
def __set_args(self, frames):
- self.args = [ '-frames', frames, '-meshfile', '%s.in' % self.name,
- '-ppmfile', '%s.ppm' % self.name ]
+ self.args = [
+ "-frames",
+ frames,
+ "-meshfile",
+ "%s.in" % self.name,
+ "-ppmfile",
+ "%s.ppm" % self.name,
+ ]
def test(self, isa, os):
- self.__set_args('10')
+ self.__set_args("10")
def train(self, isa, os):
- self.__set_args('500')
+ self.__set_args("500")
def ref(self, isa, os):
- self.__set_args('1000')
- self.simpoint = 1135*100E6
+ self.__set_args("1000")
+ self.simpoint = 1135 * 100e6
def lgred(self, isa, os):
- self.__set_args('1')
+ self.__set_args("1")
+
class mgrid(MinneDefaultBenchmark):
- name = 'mgrid'
+ name = "mgrid"
number = 172
- lang = 'F77'
- simpoint = 3292*100E6
+ lang = "F77"
+ simpoint = 3292 * 100e6
+
class sixtrack(DefaultBenchmark):
- name = 'sixtrack'
+ name = "sixtrack"
number = 200
- lang = 'F77'
- simpoint = 3043*100E6
+ lang = "F77"
+ simpoint = 3043 * 100e6
- def lgred(self, isa, os): pass
+ def lgred(self, isa, os):
+ pass
+
class swim(MinneDefaultBenchmark):
- name = 'swim'
+ name = "swim"
number = 171
- lang = 'F77'
- simpoint = 2079*100E6
+ lang = "F77"
+ simpoint = 2079 * 100e6
+
class wupwise(DefaultBenchmark):
- name = 'wupwise'
+ name = "wupwise"
number = 168
- lang = 'F77'
- simpoint = 3237*100E6
+ lang = "F77"
+ simpoint = 3237 * 100e6
- def lgred(self, isa, os): pass
+ def lgred(self, isa, os):
+ pass
+
class bzip2(DefaultBenchmark):
- name = 'bzip2'
+ name = "bzip2"
number = 256
- lang = 'C'
+ lang = "C"
def test(self, isa, os):
- self.args = [ 'input.random' ]
+ self.args = ["input.random"]
def train(self, isa, os):
- self.args = [ 'input.compressed' ]
+ self.args = ["input.compressed"]
+
class bzip2_source(bzip2):
def ref(self, isa, os):
- self.simpoint = 977*100E6
- self.args = [ 'input.source', '58' ]
+ self.simpoint = 977 * 100e6
+ self.args = ["input.source", "58"]
def lgred(self, isa, os):
- self.args = [ 'input.source', '1' ]
+ self.args = ["input.source", "1"]
+
class bzip2_graphic(bzip2):
def ref(self, isa, os):
- self.simpoint = 718*100E6
- self.args = [ 'input.graphic', '58' ]
+ self.simpoint = 718 * 100e6
+ self.args = ["input.graphic", "58"]
def lgred(self, isa, os):
- self.args = [ 'input.graphic', '1' ]
+ self.args = ["input.graphic", "1"]
+
class bzip2_program(bzip2):
def ref(self, isa, os):
- self.simpoint = 458*100E6
- self.args = [ 'input.program', '58' ]
+ self.simpoint = 458 * 100e6
+ self.args = ["input.program", "58"]
def lgred(self, isa, os):
- self.args = [ 'input.program', '1' ]
+ self.args = ["input.program", "1"]
+
class crafty(MinneDefaultBenchmark):
- name = 'crafty'
+ name = "crafty"
number = 186
- lang = 'C'
- simpoint = 774*100E6
+ lang = "C"
+ simpoint = 774 * 100e6
+
class eon(MinneDefaultBenchmark):
- name = 'eon'
+ name = "eon"
number = 252
- lang = 'CXX'
+ lang = "CXX"
stdin = None
+
class eon_kajiya(eon):
- args = [ 'chair.control.kajiya', 'chair.camera', 'chair.surfaces',
- 'chair.kajiya.ppm', 'ppm', 'pixels_out.kajiya']
- output = 'kajiya_log.out'
+ args = [
+ "chair.control.kajiya",
+ "chair.camera",
+ "chair.surfaces",
+ "chair.kajiya.ppm",
+ "ppm",
+ "pixels_out.kajiya",
+ ]
+ output = "kajiya_log.out"
class eon_cook(eon):
- args = [ 'chair.control.cook', 'chair.camera', 'chair.surfaces',
- 'chair.cook.ppm', 'ppm', 'pixels_out.cook' ]
- output = 'cook_log.out'
+ args = [
+ "chair.control.cook",
+ "chair.camera",
+ "chair.surfaces",
+ "chair.cook.ppm",
+ "ppm",
+ "pixels_out.cook",
+ ]
+ output = "cook_log.out"
+
class eon_rushmeier(eon):
- args = [ 'chair.control.rushmeier', 'chair.camera', 'chair.surfaces',
- 'chair.rushmeier.ppm', 'ppm', 'pixels_out.rushmeier' ]
- output = 'rushmeier_log.out'
- simpoint = 403*100E6
+ args = [
+ "chair.control.rushmeier",
+ "chair.camera",
+ "chair.surfaces",
+ "chair.rushmeier.ppm",
+ "ppm",
+ "pixels_out.rushmeier",
+ ]
+ output = "rushmeier_log.out"
+ simpoint = 403 * 100e6
+
class gap(DefaultBenchmark):
- name = 'gap'
+ name = "gap"
number = 254
- lang = 'C'
+ lang = "C"
def __set_args(self, size):
- self.args = [ '-l', './', '-q', '-m', size ]
+ self.args = ["-l", "./", "-q", "-m", size]
def test(self, isa, os):
- self.__set_args('64M')
+ self.__set_args("64M")
def train(self, isa, os):
- self.__set_args('128M')
+ self.__set_args("128M")
def ref(self, isa, os):
- self.__set_args('192M')
- self.simpoint = 674*100E6
+ self.__set_args("192M")
+ self.simpoint = 674 * 100e6
def lgred(self, isa, os):
- self.__set_args('64M')
+ self.__set_args("64M")
def mdred(self, isa, os):
- self.__set_args('64M')
+ self.__set_args("64M")
def smred(self, isa, os):
- self.__set_args('64M')
+ self.__set_args("64M")
+
class gcc(DefaultBenchmark):
- name = 'gcc'
+ name = "gcc"
number = 176
- lang = 'C'
+ lang = "C"
def test(self, isa, os):
- self.args = [ 'cccp.i', '-o', 'cccp.s' ]
+ self.args = ["cccp.i", "-o", "cccp.s"]
def train(self, isa, os):
- self.args = [ 'cp-decl.i', '-o', 'cp-decl.s' ]
+ self.args = ["cp-decl.i", "-o", "cp-decl.s"]
def smred(self, isa, os):
- self.args = [ 'c-iterate.i', '-o', 'c-iterate.s' ]
+ self.args = ["c-iterate.i", "-o", "c-iterate.s"]
def mdred(self, isa, os):
- self.args = [ 'rdlanal.i', '-o', 'rdlanal.s' ]
+ self.args = ["rdlanal.i", "-o", "rdlanal.s"]
def lgred(self, isa, os):
- self.args = [ 'cp-decl.i', '-o', 'cp-decl.s' ]
+ self.args = ["cp-decl.i", "-o", "cp-decl.s"]
+
class gcc_166(gcc):
def ref(self, isa, os):
- self.simpoint = 389*100E6
- self.args = [ '166.i', '-o', '166.s' ]
+ self.simpoint = 389 * 100e6
+ self.args = ["166.i", "-o", "166.s"]
+
class gcc_200(gcc):
def ref(self, isa, os):
- self.simpoint = 736*100E6
- self.args = [ '200.i', '-o', '200.s' ]
+ self.simpoint = 736 * 100e6
+ self.args = ["200.i", "-o", "200.s"]
+
class gcc_expr(gcc):
def ref(self, isa, os):
- self.simpoint = 36*100E6
- self.args = [ 'expr.i', '-o', 'expr.s' ]
+ self.simpoint = 36 * 100e6
+ self.args = ["expr.i", "-o", "expr.s"]
+
class gcc_integrate(gcc):
def ref(self, isa, os):
- self.simpoint = 4*100E6
- self.args = [ 'integrate.i', '-o', 'integrate.s' ]
+ self.simpoint = 4 * 100e6
+ self.args = ["integrate.i", "-o", "integrate.s"]
+
class gcc_scilab(gcc):
def ref(self, isa, os):
- self.simpoint = 207*100E6
- self.args = [ 'scilab.i', '-o', 'scilab.s' ]
+ self.simpoint = 207 * 100e6
+ self.args = ["scilab.i", "-o", "scilab.s"]
+
class gzip(DefaultBenchmark):
- name = 'gzip'
+ name = "gzip"
number = 164
- lang = 'C'
+ lang = "C"
def test(self, isa, os):
- self.args = [ 'input.compressed', '2' ]
+ self.args = ["input.compressed", "2"]
def train(self, isa, os):
- self.args = [ 'input.combined', '32' ]
+ self.args = ["input.combined", "32"]
+
class gzip_source(gzip):
def ref(self, isa, os):
- self.simpoint = 334*100E6
- self.args = [ 'input.source', '1' ]
+ self.simpoint = 334 * 100e6
+ self.args = ["input.source", "1"]
+
def smred(self, isa, os):
- self.args = [ 'input.source', '1' ]
+ self.args = ["input.source", "1"]
+
def mdred(self, isa, os):
- self.args = [ 'input.source', '1' ]
+ self.args = ["input.source", "1"]
+
def lgred(self, isa, os):
- self.args = [ 'input.source', '1' ]
+ self.args = ["input.source", "1"]
+
class gzip_log(gzip):
def ref(self, isa, os):
- self.simpoint = 265*100E6
- self.args = [ 'input.log', '60' ]
+ self.simpoint = 265 * 100e6
+ self.args = ["input.log", "60"]
+
def smred(self, isa, os):
- self.args = [ 'input.log', '1' ]
+ self.args = ["input.log", "1"]
+
def mdred(self, isa, os):
- self.args = [ 'input.log', '1' ]
+ self.args = ["input.log", "1"]
+
def lgred(self, isa, os):
- self.args = [ 'input.log', '1' ]
+ self.args = ["input.log", "1"]
+
class gzip_graphic(gzip):
def ref(self, isa, os):
- self.simpoint = 653*100E6
- self.args = [ 'input.graphic', '60' ]
+ self.simpoint = 653 * 100e6
+ self.args = ["input.graphic", "60"]
+
def smred(self, isa, os):
- self.args = [ 'input.graphic', '1' ]
+ self.args = ["input.graphic", "1"]
+
def mdred(self, isa, os):
- self.args = [ 'input.graphic', '1' ]
+ self.args = ["input.graphic", "1"]
+
def lgred(self, isa, os):
- self.args = [ 'input.graphic', '1' ]
+ self.args = ["input.graphic", "1"]
+
class gzip_random(gzip):
def ref(self, isa, os):
- self.simpoint = 623*100E6
- self.args = [ 'input.random', '60' ]
+ self.simpoint = 623 * 100e6
+ self.args = ["input.random", "60"]
+
def smred(self, isa, os):
- self.args = [ 'input.random', '1' ]
+ self.args = ["input.random", "1"]
+
def mdred(self, isa, os):
- self.args = [ 'input.random', '1' ]
+ self.args = ["input.random", "1"]
+
def lgred(self, isa, os):
- self.args = [ 'input.random', '1' ]
+ self.args = ["input.random", "1"]
+
class gzip_program(gzip):
def ref(self, isa, os):
- self.simpoint = 1189*100E6
- self.args = [ 'input.program', '60' ]
+ self.simpoint = 1189 * 100e6
+ self.args = ["input.program", "60"]
+
def smred(self, isa, os):
- self.args = [ 'input.program', '1' ]
+ self.args = ["input.program", "1"]
+
def mdred(self, isa, os):
- self.args = [ 'input.program', '1' ]
+ self.args = ["input.program", "1"]
+
def lgred(self, isa, os):
- self.args = [ 'input.program', '1' ]
+ self.args = ["input.program", "1"]
+
class mcf(MinneDefaultBenchmark):
- name = 'mcf'
+ name = "mcf"
number = 181
- lang = 'C'
- args = [ 'mcf.in' ]
- simpoint = 553*100E6
+ lang = "C"
+ args = ["mcf.in"]
+ simpoint = 553 * 100e6
+
class parser(MinneDefaultBenchmark):
- name = 'parser'
+ name = "parser"
number = 197
- lang = 'C'
- args = [ '2.1.dict', '-batch' ]
- simpoint = 1146*100E6
+ lang = "C"
+ args = ["2.1.dict", "-batch"]
+ simpoint = 1146 * 100e6
+
class perlbmk(DefaultBenchmark):
- name = 'perlbmk'
+ name = "perlbmk"
number = 253
- lang = 'C'
+ lang = "C"
def test(self, isa, os):
- self.args = [ '-I.', '-I', 'lib', 'test.pl' ]
- self.stdin = 'test.in'
+ self.args = ["-I.", "-I", "lib", "test.pl"]
+ self.stdin = "test.in"
+
class perlbmk_diffmail(perlbmk):
def ref(self, isa, os):
- self.simpoint = 141*100E6
- self.args = [ '-I', 'lib', 'diffmail.pl', '2', '550', '15', '24',
- '23', '100' ]
+ self.simpoint = 141 * 100e6
+ self.args = [
+ "-I",
+ "lib",
+ "diffmail.pl",
+ "2",
+ "550",
+ "15",
+ "24",
+ "23",
+ "100",
+ ]
def train(self, isa, os):
- self.args = [ '-I', 'lib', 'diffmail.pl', '2', '350', '15', '24',
- '23', '150' ]
+ self.args = [
+ "-I",
+ "lib",
+ "diffmail.pl",
+ "2",
+ "350",
+ "15",
+ "24",
+ "23",
+ "150",
+ ]
+
class perlbmk_scrabbl(perlbmk):
def train(self, isa, os):
- self.args = [ '-I.', '-I', 'lib', 'scrabbl.pl' ]
- self.stdin = 'scrabbl.in'
+ self.args = ["-I.", "-I", "lib", "scrabbl.pl"]
+ self.stdin = "scrabbl.in"
+
class perlbmk_makerand(perlbmk):
def ref(self, isa, os):
- self.simpoint = 11*100E6
- self.args = [ '-I', 'lib', 'makerand.pl' ]
+ self.simpoint = 11 * 100e6
+ self.args = ["-I", "lib", "makerand.pl"]
def lgred(self, isa, os):
- self.args = [ '-I.', '-I', 'lib', 'lgred.makerand.pl' ]
+ self.args = ["-I.", "-I", "lib", "lgred.makerand.pl"]
def mdred(self, isa, os):
- self.args = [ '-I.', '-I', 'lib', 'mdred.makerand.pl' ]
+ self.args = ["-I.", "-I", "lib", "mdred.makerand.pl"]
def smred(self, isa, os):
- self.args = [ '-I.', '-I', 'lib', 'smred.makerand.pl' ]
+ self.args = ["-I.", "-I", "lib", "smred.makerand.pl"]
+
class perlbmk_perfect(perlbmk):
def ref(self, isa, os):
- self.simpoint = 5*100E6
- self.args = [ '-I', 'lib', 'perfect.pl', 'b', '3', 'm', '4' ]
+ self.simpoint = 5 * 100e6
+ self.args = ["-I", "lib", "perfect.pl", "b", "3", "m", "4"]
def train(self, isa, os):
- self.args = [ '-I', 'lib', 'perfect.pl', 'b', '3' ]
+ self.args = ["-I", "lib", "perfect.pl", "b", "3"]
+
class perlbmk_splitmail1(perlbmk):
def ref(self, isa, os):
- self.simpoint = 405*100E6
- self.args = [ '-I', 'lib', 'splitmail.pl', '850', '5', '19',
- '18', '1500' ]
+ self.simpoint = 405 * 100e6
+ self.args = [
+ "-I",
+ "lib",
+ "splitmail.pl",
+ "850",
+ "5",
+ "19",
+ "18",
+ "1500",
+ ]
+
class perlbmk_splitmail2(perlbmk):
def ref(self, isa, os):
- self.args = [ '-I', 'lib', 'splitmail.pl', '704', '12', '26',
- '16', '836' ]
+ self.args = [
+ "-I",
+ "lib",
+ "splitmail.pl",
+ "704",
+ "12",
+ "26",
+ "16",
+ "836",
+ ]
+
class perlbmk_splitmail3(perlbmk):
def ref(self, isa, os):
- self.args = [ '-I', 'lib', 'splitmail.pl', '535', '13', '25',
- '24', '1091' ]
+ self.args = [
+ "-I",
+ "lib",
+ "splitmail.pl",
+ "535",
+ "13",
+ "25",
+ "24",
+ "1091",
+ ]
+
class perlbmk_splitmail4(perlbmk):
def ref(self, isa, os):
- self.args = [ '-I', 'lib', 'splitmail.pl', '957', '12', '23',
- '26', '1014' ]
+ self.args = [
+ "-I",
+ "lib",
+ "splitmail.pl",
+ "957",
+ "12",
+ "23",
+ "26",
+ "1014",
+ ]
+
class twolf(Benchmark):
- name = 'twolf'
+ name = "twolf"
number = 300
- lang = 'C'
+ lang = "C"
stdin = None
def test(self, isa, os):
- self.args = [ 'test' ]
+ self.args = ["test"]
def train(self, isa, os):
- self.args = [ 'train' ]
+ self.args = ["train"]
def ref(self, isa, os):
- self.simpoint = 1066*100E6
- self.args = [ 'ref' ]
+ self.simpoint = 1066 * 100e6
+ self.args = ["ref"]
def smred(self, isa, os):
- self.args = [ 'smred' ]
+ self.args = ["smred"]
def mdred(self, isa, os):
- self.args = [ 'mdred' ]
+ self.args = ["mdred"]
def lgred(self, isa, os):
- self.args = [ 'lgred' ]
+ self.args = ["lgred"]
+
class vortex(Benchmark):
- name = 'vortex'
+ name = "vortex"
number = 255
- lang = 'C'
+ lang = "C"
stdin = None
def __init__(self, isa, os, input_set):
- if (isa in ('arm', 'thumb', 'aarch64')):
- self.endian = 'lendian'
- elif (isa == 'sparc' or isa == 'sparc32'):
- self.endian = 'bendian'
+ if isa in ("arm", "thumb", "aarch64"):
+ self.endian = "lendian"
+ elif isa == "sparc" or isa == "sparc32":
+ self.endian = "bendian"
else:
raise AttributeError("unknown ISA %s" % isa)
super(vortex, self).__init__(isa, os, input_set)
def test(self, isa, os):
- self.args = [ '%s.raw' % self.endian ]
- self.output = 'vortex.out'
+ self.args = ["%s.raw" % self.endian]
+ self.output = "vortex.out"
def train(self, isa, os):
- self.args = [ '%s.raw' % self.endian ]
- self.output = 'vortex.out'
+ self.args = ["%s.raw" % self.endian]
+ self.output = "vortex.out"
def smred(self, isa, os):
- self.args = [ '%s.raw' % self.endian ]
- self.output = 'vortex.out'
+ self.args = ["%s.raw" % self.endian]
+ self.output = "vortex.out"
def mdred(self, isa, os):
- self.args = [ '%s.raw' % self.endian ]
- self.output = 'vortex.out'
+ self.args = ["%s.raw" % self.endian]
+ self.output = "vortex.out"
def lgred(self, isa, os):
- self.args = [ '%s.raw' % self.endian ]
- self.output = 'vortex.out'
+ self.args = ["%s.raw" % self.endian]
+ self.output = "vortex.out"
+
class vortex1(vortex):
def ref(self, isa, os):
- self.args = [ '%s1.raw' % self.endian ]
- self.output = 'vortex1.out'
- self.simpoint = 271*100E6
+ self.args = ["%s1.raw" % self.endian]
+ self.output = "vortex1.out"
+ self.simpoint = 271 * 100e6
class vortex2(vortex):
def ref(self, isa, os):
- self.simpoint = 1024*100E6
- self.args = [ '%s2.raw' % self.endian ]
- self.output = 'vortex2.out'
+ self.simpoint = 1024 * 100e6
+ self.args = ["%s2.raw" % self.endian]
+ self.output = "vortex2.out"
+
class vortex3(vortex):
def ref(self, isa, os):
- self.simpoint = 564*100E6
- self.args = [ '%s3.raw' % self.endian ]
- self.output = 'vortex3.out'
+ self.simpoint = 564 * 100e6
+ self.args = ["%s3.raw" % self.endian]
+ self.output = "vortex3.out"
+
class vpr(MinneDefaultBenchmark):
- name = 'vpr'
+ name = "vpr"
number = 175
- lang = 'C'
+ lang = "C"
+
# not sure about vpr minnespec place.in
class vpr_place(vpr):
- args = [ 'net.in', 'arch.in', 'place.out', 'dum.out', '-nodisp',
- '-place_only', '-init_t', '5', '-exit_t', '0.005',
- '-alpha_t', '0.9412', '-inner_num', '2' ]
- output = 'place_log.out'
+ args = [
+ "net.in",
+ "arch.in",
+ "place.out",
+ "dum.out",
+ "-nodisp",
+ "-place_only",
+ "-init_t",
+ "5",
+ "-exit_t",
+ "0.005",
+ "-alpha_t",
+ "0.9412",
+ "-inner_num",
+ "2",
+ ]
+ output = "place_log.out"
+
class vpr_route(vpr):
- simpoint = 476*100E6
- args = [ 'net.in', 'arch.in', 'place.in', 'route.out', '-nodisp',
- '-route_only', '-route_chan_width', '15',
- '-pres_fac_mult', '2', '-acc_fac', '1',
- '-first_iter_pres_fac', '4', '-initial_pres_fac', '8' ]
- output = 'route_log.out'
+ simpoint = 476 * 100e6
+ args = [
+ "net.in",
+ "arch.in",
+ "place.in",
+ "route.out",
+ "-nodisp",
+ "-route_only",
+ "-route_chan_width",
+ "15",
+ "-pres_fac_mult",
+ "2",
+ "-acc_fac",
+ "1",
+ "-first_iter_pres_fac",
+ "4",
+ "-initial_pres_fac",
+ "8",
+ ]
+ output = "route_log.out"
-all = [ ammp, applu, apsi, art, art110, art470, equake, facerec, fma3d, galgel,
- lucas, mesa, mgrid, sixtrack, swim, wupwise, bzip2_source,
- bzip2_graphic, bzip2_program, crafty, eon_kajiya, eon_cook,
- eon_rushmeier, gap, gcc_166, gcc_200, gcc_expr, gcc_integrate,
- gcc_scilab, gzip_source, gzip_log, gzip_graphic, gzip_random,
- gzip_program, mcf, parser, perlbmk_diffmail, perlbmk_makerand,
- perlbmk_perfect, perlbmk_splitmail1, perlbmk_splitmail2,
- perlbmk_splitmail3, perlbmk_splitmail4, twolf, vortex1, vortex2,
- vortex3, vpr_place, vpr_route ]
-__all__ = [ x.__name__ for x in all ]
+all = [
+ ammp,
+ applu,
+ apsi,
+ art,
+ art110,
+ art470,
+ equake,
+ facerec,
+ fma3d,
+ galgel,
+ lucas,
+ mesa,
+ mgrid,
+ sixtrack,
+ swim,
+ wupwise,
+ bzip2_source,
+ bzip2_graphic,
+ bzip2_program,
+ crafty,
+ eon_kajiya,
+ eon_cook,
+ eon_rushmeier,
+ gap,
+ gcc_166,
+ gcc_200,
+ gcc_expr,
+ gcc_integrate,
+ gcc_scilab,
+ gzip_source,
+ gzip_log,
+ gzip_graphic,
+ gzip_random,
+ gzip_program,
+ mcf,
+ parser,
+ perlbmk_diffmail,
+ perlbmk_makerand,
+ perlbmk_perfect,
+ perlbmk_splitmail1,
+ perlbmk_splitmail2,
+ perlbmk_splitmail3,
+ perlbmk_splitmail4,
+ twolf,
+ vortex1,
+ vortex2,
+ vortex3,
+ vpr_place,
+ vpr_route,
+]
-if __name__ == '__main__':
+__all__ = [x.__name__ for x in all]
+
+if __name__ == "__main__":
from pprint import pprint
+
for bench in all:
- for input_set in 'ref', 'test', 'train':
- print('class: %s' % bench.__name__)
- x = bench('x86', 'linux', input_set)
- print('%s: %s' % (x, input_set))
+ for input_set in "ref", "test", "train":
+ print("class: %s" % bench.__name__)
+ x = bench("x86", "linux", input_set)
+ print("%s: %s" % (x, input_set))
pprint(x.makeProcessArgs())
print()
diff --git a/configs/dist/sw.py b/configs/dist/sw.py
index 41edf9e..7267357 100644
--- a/configs/dist/sw.py
+++ b/configs/dist/sw.py
@@ -35,33 +35,39 @@
from m5.objects import *
from m5.util import addToPath, fatal
-addToPath('../')
+addToPath("../")
from common import Simulation
from common import Options
+
def build_switch(args):
# instantiate an EtherSwitch
switch = EtherSwitch()
# instantiate distEtherLinks to connect switch ports
# to other gem5 instances
- switch.portlink = [DistEtherLink(speed = args.ethernet_linkspeed,
- delay = args.ethernet_linkdelay,
- dist_rank = args.dist_rank,
- dist_size = args.dist_size,
- server_name = args.dist_server_name,
- server_port = args.dist_server_port,
- sync_start = args.dist_sync_start,
- sync_repeat = args.dist_sync_repeat,
- is_switch = True,
- num_nodes = args.dist_size)
- for i in range(args.dist_size)]
+ switch.portlink = [
+ DistEtherLink(
+ speed=args.ethernet_linkspeed,
+ delay=args.ethernet_linkdelay,
+ dist_rank=args.dist_rank,
+ dist_size=args.dist_size,
+ server_name=args.dist_server_name,
+ server_port=args.dist_server_port,
+ sync_start=args.dist_sync_start,
+ sync_repeat=args.dist_sync_repeat,
+ is_switch=True,
+ num_nodes=args.dist_size,
+ )
+ for i in range(args.dist_size)
+ ]
for (i, link) in enumerate(switch.portlink):
link.int0 = switch.interface[i]
return switch
+
def main():
# Add options
parser = argparse.ArgumentParser()
@@ -70,8 +76,9 @@
args = parser.parse_args()
system = build_switch(args)
- root = Root(full_system = True, system = system)
+ root = Root(full_system=True, system=system)
Simulation.run(args, root, None, None)
+
if __name__ == "__m5_main__":
main()
diff --git a/configs/dram/lat_mem_rd.py b/configs/dram/lat_mem_rd.py
index d69457d..74a9499 100644
--- a/configs/dram/lat_mem_rd.py
+++ b/configs/dram/lat_mem_rd.py
@@ -42,11 +42,11 @@
from m5.util import addToPath
from m5.stats import periodicStatDump
-addToPath('../')
+addToPath("../")
from common import ObjectList
from common import MemConfig
-addToPath('../../util')
+addToPath("../../util")
import protolib
# this script is helpful to observe the memory latency for various
@@ -61,8 +61,15 @@
except:
print("Did not find packet proto definitions, attempting to generate")
from subprocess import call
- error = call(['protoc', '--python_out=configs/dram',
- '--proto_path=src/proto', 'src/proto/packet.proto'])
+
+ error = call(
+ [
+ "protoc",
+ "--python_out=configs/dram",
+ "--proto_path=src/proto",
+ "src/proto/packet.proto",
+ ]
+ )
if not error:
print("Generated packet proto definitions")
@@ -79,24 +86,34 @@
parser = argparse.ArgumentParser()
-parser.add_argument("--mem-type", default="DDR3_1600_8x8",
- choices=ObjectList.mem_list.get_names(),
- help = "type of memory to use")
-parser.add_argument("--mem-size", action="store", type=str,
- default="16MB",
- help="Specify the memory size")
-parser.add_argument("--reuse-trace", action="store_true",
- help="Prevent generation of traces and reuse existing")
+parser.add_argument(
+ "--mem-type",
+ default="DDR3_1600_8x8",
+ choices=ObjectList.mem_list.get_names(),
+ help="type of memory to use",
+)
+parser.add_argument(
+ "--mem-size",
+ action="store",
+ type=str,
+ default="16MB",
+ help="Specify the memory size",
+)
+parser.add_argument(
+ "--reuse-trace",
+ action="store_true",
+ help="Prevent generation of traces and reuse existing",
+)
args = parser.parse_args()
# start by creating the system itself, using a multi-layer 2.0 GHz
# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
# amounts to 42.7 GByte/s per layer and thus per port
-system = System(membus = SystemXBar(width = 32))
-system.clk_domain = SrcClockDomain(clock = '2.0GHz',
- voltage_domain =
- VoltageDomain(voltage = '1V'))
+system = System(membus=SystemXBar(width=32))
+system.clk_domain = SrcClockDomain(
+ clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
+)
mem_range = AddrRange(args.mem_size)
system.mem_ranges = [mem_range]
@@ -122,12 +139,12 @@
if isinstance(ctrl, m5.objects.MemCtrl):
# make the DRAM refresh interval sufficiently infinite to avoid
# latency spikes
- ctrl.tREFI = '100s'
+ ctrl.tREFI = "100s"
# use the same concept as the utilisation sweep, and print the config
# so that we can later read it in
cfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg")
-cfg_file = open(cfg_file_name, 'w')
+cfg_file = open(cfg_file_name, "w")
# set an appropriate burst length in bytes
burst_size = 64
@@ -137,6 +154,7 @@
def is_pow2(num):
return num != 0 and ((num & (num - 1)) == 0)
+
# assume we start every range at 0
max_range = int(mem_range.end)
@@ -164,7 +182,7 @@
# the actual measurement
def create_trace(filename, max_addr, burst_size, itt):
try:
- proto_out = gzip.open(filename, 'wb')
+ proto_out = gzip.open(filename, "wb")
except IOError:
print("Failed to open ", filename, " for writing")
exit(-1)
@@ -184,6 +202,7 @@
addrs = list(range(0, max_addr, burst_size))
import random
+
random.shuffle(addrs)
tick = 0
@@ -202,6 +221,7 @@
proto_out.close()
+
# this will take a while, so keep the user informed
print("Generating traces, please wait...")
@@ -211,22 +231,23 @@
# now we create the states for each range
for r in ranges:
- filename = os.path.join(m5.options.outdir,
- 'lat_mem_rd%d.trc.gz' % nxt_range)
+ filename = os.path.join(
+ m5.options.outdir, "lat_mem_rd%d.trc.gz" % nxt_range
+ )
if not args.reuse_trace:
# create the actual random trace for this range
create_trace(filename, r, burst_size, itt)
# the warming state
- cfg_file.write("STATE %d %d TRACE %s 0\n" %
- (nxt_state, period, filename))
+ cfg_file.write("STATE %d %d TRACE %s 0\n" % (nxt_state, period, filename))
nxt_state = nxt_state + 1
# the measuring states
for i in range(iterations):
- cfg_file.write("STATE %d %d TRACE %s 0\n" %
- (nxt_state, period, filename))
+ cfg_file.write(
+ "STATE %d %d TRACE %s 0\n" % (nxt_state, period, filename)
+ )
nxt_state = nxt_state + 1
nxt_range = nxt_range + 1
@@ -242,8 +263,7 @@
cfg_file.close()
# create a traffic generator, and point it to the file we just created
-system.tgen = TrafficGen(config_file = cfg_file_name,
- progress_check = '10s')
+system.tgen = TrafficGen(config_file=cfg_file_name, progress_check="10s")
# add a communication monitor
system.monitor = CommMonitor()
@@ -267,19 +287,20 @@
tgts_per_mshr = 12
write_buffers = 16
+
# note that everything is in the same clock domain, 2.0 GHz as
# specified above
-system.l1cache = L1_DCache(size = '64kB')
+system.l1cache = L1_DCache(size="64kB")
system.monitor.mem_side_port = system.l1cache.cpu_side
-system.l2cache = L2Cache(size = '512kB', writeback_clean = True)
+system.l2cache = L2Cache(size="512kB", writeback_clean=True)
system.l2cache.xbar = L2XBar()
system.l1cache.mem_side = system.l2cache.xbar.cpu_side_ports
system.l2cache.cpu_side = system.l2cache.xbar.mem_side_ports
# make the L3 mostly exclusive, and correspondingly ensure that the L2
# writes back also clean lines to the L3
-system.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl')
+system.l3cache = L3Cache(size="4MB", clusivity="mostly_excl")
system.l3cache.xbar = L2XBar()
system.l2cache.mem_side = system.l3cache.xbar.cpu_side_ports
system.l3cache.cpu_side = system.l3cache.xbar.mem_side_ports
@@ -292,8 +313,8 @@
periodicStatDump(period)
# run Forrest, run!
-root = Root(full_system = False, system = system)
-root.system.mem_mode = 'timing'
+root = Root(full_system=False, system=system)
+root.system.mem_mode = "timing"
m5.instantiate()
m5.simulate(nxt_state * period)
diff --git a/configs/dram/low_power_sweep.py b/configs/dram/low_power_sweep.py
index 5147007..7f8591b 100644
--- a/configs/dram/low_power_sweep.py
+++ b/configs/dram/low_power_sweep.py
@@ -40,7 +40,7 @@
from m5.util import addToPath
from m5.stats import periodicStatDump
-addToPath('../')
+addToPath("../")
from common import ObjectList
from common import MemConfig
@@ -52,46 +52,70 @@
# through an idle state with no requests to enforce self-refresh.
parser = argparse.ArgumentParser(
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter
+)
# Use a single-channel DDR4-2400 in 16x4 configuration by default
-parser.add_argument("--mem-type", default="DDR4_2400_16x4",
- choices=ObjectList.mem_list.get_names(),
- help = "type of memory to use")
+parser.add_argument(
+ "--mem-type",
+ default="DDR4_2400_16x4",
+ choices=ObjectList.mem_list.get_names(),
+ help="type of memory to use",
+)
-parser.add_argument("--mem-ranks", "-r", type=int, default=1,
- help = "Number of ranks to iterate across")
+parser.add_argument(
+ "--mem-ranks",
+ "-r",
+ type=int,
+ default=1,
+ help="Number of ranks to iterate across",
+)
-parser.add_argument("--page-policy", "-p",
- choices=["close_adaptive", "open_adaptive"],
- default="close_adaptive", help="controller page policy")
+parser.add_argument(
+ "--page-policy",
+ "-p",
+ choices=["close_adaptive", "open_adaptive"],
+ default="close_adaptive",
+ help="controller page policy",
+)
-parser.add_argument("--itt-list", "-t", default="1 20 100",
- help="a list of multipliers for the max value of itt, " \
- "e.g. \"1 20 100\"")
+parser.add_argument(
+ "--itt-list",
+ "-t",
+ default="1 20 100",
+ help="a list of multipliers for the max value of itt, " 'e.g. "1 20 100"',
+)
-parser.add_argument("--rd-perc", type=int, default=100,
- help = "Percentage of read commands")
+parser.add_argument(
+ "--rd-perc", type=int, default=100, help="Percentage of read commands"
+)
-parser.add_argument("--addr-map",
- choices=m5.objects.AddrMap.vals,
- default="RoRaBaCoCh", help = "DRAM address map policy")
+parser.add_argument(
+ "--addr-map",
+ choices=m5.objects.AddrMap.vals,
+ default="RoRaBaCoCh",
+ help="DRAM address map policy",
+)
-parser.add_argument("--idle-end", type=int, default=50000000,
- help = "time in ps of an idle period at the end ")
+parser.add_argument(
+ "--idle-end",
+ type=int,
+ default=50000000,
+ help="time in ps of an idle period at the end ",
+)
args = parser.parse_args()
# Start with the system itself, using a multi-layer 2.0 GHz
# crossbar, delivering 64 bytes / 3 cycles (one header cycle)
# which amounts to 42.7 GByte/s per layer and thus per port.
-system = System(membus = IOXBar(width = 32))
-system.clk_domain = SrcClockDomain(clock = '2.0GHz',
- voltage_domain =
- VoltageDomain(voltage = '1V'))
+system = System(membus=IOXBar(width=32))
+system.clk_domain = SrcClockDomain(
+ clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
+)
# We are fine with 256 MB memory for now.
-mem_range = AddrRange('256MB')
+mem_range = AddrRange("256MB")
# Start address is 0
system.mem_ranges = [mem_range]
@@ -130,20 +154,27 @@
# We specify the states in a config file input to the traffic generator.
cfg_file_name = "lowp_sweep.cfg"
-cfg_file_path = os.path.dirname(__file__) + "/" +cfg_file_name
-cfg_file = open(cfg_file_path, 'w')
+cfg_file_path = os.path.dirname(__file__) + "/" + cfg_file_name
+cfg_file = open(cfg_file_path, "w")
# Get the number of banks
nbr_banks = int(system.mem_ctrls[0].dram.banks_per_rank.value)
# determine the burst size in bytes
-burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
- system.mem_ctrls[0].dram.device_bus_width.value *
- system.mem_ctrls[0].dram.burst_length.value) / 8)
+burst_size = int(
+ (
+ system.mem_ctrls[0].dram.devices_per_rank.value
+ * system.mem_ctrls[0].dram.device_bus_width.value
+ * system.mem_ctrls[0].dram.burst_length.value
+ )
+ / 8
+)
# next, get the page size in bytes (the rowbuffer size is already in bytes)
-page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
- system.mem_ctrls[0].dram.device_rowbuffer_size.value
+page_size = (
+ system.mem_ctrls[0].dram.devices_per_rank.value
+ * system.mem_ctrls[0].dram.device_rowbuffer_size.value
+)
# Inter-request delay should be such that we can hit as many transitions
# to/from low power states as possible to. We provide a min and max itt to the
@@ -151,23 +182,25 @@
# seconds and we need it in ticks (ps).
itt_min = system.mem_ctrls[0].dram.tBURST.value * 1000000000000
-#The itt value when set to (tRAS + tRP + tCK) covers the case where
+# The itt value when set to (tRAS + tRP + tCK) covers the case where
# a read command is delayed beyond the delay from ACT to PRE_PDN entry of the
# previous command. For write command followed by precharge, this delay
# between a write and power down entry will be tRCD + tCL + tWR + tRP + tCK.
# As we use this delay as a unit and create multiples of it as bigger delays
# for the sweep, this parameter works for reads, writes and mix of them.
-pd_entry_time = (system.mem_ctrls[0].dram.tRAS.value +
- system.mem_ctrls[0].dram.tRP.value +
- system.mem_ctrls[0].dram.tCK.value) * 1000000000000
+pd_entry_time = (
+ system.mem_ctrls[0].dram.tRAS.value
+ + system.mem_ctrls[0].dram.tRP.value
+ + system.mem_ctrls[0].dram.tCK.value
+) * 1000000000000
# We sweep itt max using the multipliers specified by the user.
itt_max_str = args.itt_list.strip().split()
-itt_max_multiples = [ int(x) for x in itt_max_str ]
+itt_max_multiples = [int(x) for x in itt_max_str]
if len(itt_max_multiples) == 0:
fatal("String for itt-max-list detected empty\n")
-itt_max_values = [ pd_entry_time * m for m in itt_max_multiples ]
+itt_max_values = [pd_entry_time * m for m in itt_max_multiples]
# Generate request addresses in the entire range, assume we start at 0
max_addr = mem_range.end
@@ -180,12 +213,14 @@
# be selective about bank utilization instead of going from 1 to the number of
# banks
-bank_util_values = [1, int(nbr_banks/2), nbr_banks]
+bank_util_values = [1, int(nbr_banks / 2), nbr_banks]
# Next we create the config file, but first a comment
-cfg_file.write("""# STATE state# period mode=DRAM
+cfg_file.write(
+ """# STATE state# period mode=DRAM
# read_percent start_addr end_addr req_size min_itt max_itt data_limit
-# stride_size page_size #banks #banks_util addr_map #ranks\n""")
+# stride_size page_size #banks #banks_util addr_map #ranks\n"""
+)
addr_map = m5.objects.AddrMap.map[args.addr_map]
@@ -193,12 +228,27 @@
for itt_max in itt_max_values:
for bank in bank_util_values:
for stride_size in stride_values:
- cfg_file.write("STATE %d %d %s %d 0 %d %d "
- "%d %d %d %d %d %d %d %d %d\n" %
- (nxt_state, period, "DRAM", args.rd_perc, max_addr,
- burst_size, itt_min, itt_max, 0, stride_size,
- page_size, nbr_banks, bank, addr_map,
- args.mem_ranks))
+ cfg_file.write(
+ "STATE %d %d %s %d 0 %d %d "
+ "%d %d %d %d %d %d %d %d %d\n"
+ % (
+ nxt_state,
+ period,
+ "DRAM",
+ args.rd_perc,
+ max_addr,
+ burst_size,
+ itt_min,
+ itt_max,
+ 0,
+ stride_size,
+ page_size,
+ nbr_banks,
+ bank,
+ addr_map,
+ args.mem_ranks,
+ )
+ )
nxt_state = nxt_state + 1
# State for idle period
@@ -217,7 +267,7 @@
cfg_file.close()
# create a traffic generator, and point it to the file we just created
-system.tgen = TrafficGen(config_file = cfg_file_path)
+system.tgen = TrafficGen(config_file=cfg_file_path)
# add a communication monitor
system.monitor = CommMonitor()
@@ -232,8 +282,8 @@
# every period, dump and reset all stats
periodicStatDump(period)
-root = Root(full_system = False, system = system)
-root.system.mem_mode = 'timing'
+root = Root(full_system=False, system=system)
+root.system.mem_mode = "timing"
m5.instantiate()
@@ -242,8 +292,10 @@
m5.simulate(nxt_state * period + idle_period)
print("--- Done DRAM low power sweep ---")
print("Fixed params - ")
-print("\tburst: %d, banks: %d, max stride: %d, itt min: %s ns" % \
- (burst_size, nbr_banks, max_stride, itt_min))
+print(
+ "\tburst: %d, banks: %d, max stride: %d, itt min: %s ns"
+ % (burst_size, nbr_banks, max_stride, itt_min)
+)
print("Swept params - ")
print("\titt max multiples input:", itt_max_multiples)
print("\titt max values", itt_max_values)
diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py
index 0205f0d..ca7b70d 100644
--- a/configs/dram/sweep.py
+++ b/configs/dram/sweep.py
@@ -41,7 +41,7 @@
from m5.util import addToPath
from m5.stats import periodicStatDump
-addToPath('../')
+addToPath("../")
from common import ObjectList
from common import MemConfig
@@ -54,29 +54,44 @@
parser = argparse.ArgumentParser()
dram_generators = {
- "DRAM" : lambda x: x.createDram,
- "DRAM_ROTATE" : lambda x: x.createDramRot,
+ "DRAM": lambda x: x.createDram,
+ "DRAM_ROTATE": lambda x: x.createDramRot,
}
# Use a single-channel DDR3-1600 x64 (8x8 topology) by default
-parser.add_argument("--mem-type", default="DDR3_1600_8x8",
- choices=ObjectList.mem_list.get_names(),
- help = "type of memory to use")
+parser.add_argument(
+ "--mem-type",
+ default="DDR3_1600_8x8",
+ choices=ObjectList.mem_list.get_names(),
+ help="type of memory to use",
+)
-parser.add_argument("--mem-ranks", "-r", type=int, default=1,
- help = "Number of ranks to iterate across")
+parser.add_argument(
+ "--mem-ranks",
+ "-r",
+ type=int,
+ default=1,
+ help="Number of ranks to iterate across",
+)
-parser.add_argument("--rd_perc", type=int, default=100,
- help = "Percentage of read commands")
+parser.add_argument(
+ "--rd_perc", type=int, default=100, help="Percentage of read commands"
+)
-parser.add_argument("--mode", default="DRAM",
- choices=list(dram_generators.keys()),
- help = "DRAM: Random traffic; \
- DRAM_ROTATE: Traffic rotating across banks and ranks")
+parser.add_argument(
+ "--mode",
+ default="DRAM",
+ choices=list(dram_generators.keys()),
+ help="DRAM: Random traffic; \
+ DRAM_ROTATE: Traffic rotating across banks and ranks",
+)
-parser.add_argument("--addr-map",
- choices=ObjectList.dram_addr_map_list.get_names(),
- default="RoRaBaCoCh", help = "DRAM address map policy")
+parser.add_argument(
+ "--addr-map",
+ choices=ObjectList.dram_addr_map_list.get_names(),
+ default="RoRaBaCoCh",
+ help="DRAM address map policy",
+)
args = parser.parse_args()
@@ -86,13 +101,13 @@
# start with the system itself, using a multi-layer 2.0 GHz
# crossbar, delivering 64 bytes / 3 cycles (one header cycle)
# which amounts to 42.7 GByte/s per layer and thus per port
-system = System(membus = IOXBar(width = 32))
-system.clk_domain = SrcClockDomain(clock = '2.0GHz',
- voltage_domain =
- VoltageDomain(voltage = '1V'))
+system = System(membus=IOXBar(width=32))
+system.clk_domain = SrcClockDomain(
+ clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
+)
# we are fine with 256 MB memory for now
-mem_range = AddrRange('256MB')
+mem_range = AddrRange("256MB")
system.mem_ranges = [mem_range]
# do not worry about reserving space for the backing store
@@ -131,18 +146,31 @@
nbr_banks = system.mem_ctrls[0].dram.banks_per_rank.value
# determine the burst length in bytes
-burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
- system.mem_ctrls[0].dram.device_bus_width.value *
- system.mem_ctrls[0].dram.burst_length.value) / 8)
+burst_size = int(
+ (
+ system.mem_ctrls[0].dram.devices_per_rank.value
+ * system.mem_ctrls[0].dram.device_bus_width.value
+ * system.mem_ctrls[0].dram.burst_length.value
+ )
+ / 8
+)
# next, get the page size in bytes
-page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
- system.mem_ctrls[0].dram.device_rowbuffer_size.value
+page_size = (
+ system.mem_ctrls[0].dram.devices_per_rank.value
+ * system.mem_ctrls[0].dram.device_rowbuffer_size.value
+)
# match the maximum bandwidth of the memory, the parameter is in seconds
# and we need it in ticks (ps)
-itt = getattr(system.mem_ctrls[0].dram.tBURST_MIN, 'value',
- system.mem_ctrls[0].dram.tBURST.value) * 1000000000000
+itt = (
+ getattr(
+ system.mem_ctrls[0].dram.tBURST_MIN,
+ "value",
+ system.mem_ctrls[0].dram.tBURST.value,
+ )
+ * 1000000000000
+)
# assume we start at 0
max_addr = mem_range.end
@@ -168,27 +196,43 @@
periodicStatDump(period)
# run Forrest, run!
-root = Root(full_system = False, system = system)
-root.system.mem_mode = 'timing'
+root = Root(full_system=False, system=system)
+root.system.mem_mode = "timing"
m5.instantiate()
+
def trace():
addr_map = ObjectList.dram_addr_map_list.get(args.addr_map)
generator = dram_generators[args.mode](system.tgen)
for stride_size in range(burst_size, max_stride + 1, burst_size):
for bank in range(1, nbr_banks + 1):
num_seq_pkts = int(math.ceil(float(stride_size) / burst_size))
- yield generator(period,
- 0, max_addr, burst_size, int(itt), int(itt),
- args.rd_perc, 0,
- num_seq_pkts, page_size, nbr_banks, bank,
- addr_map, args.mem_ranks)
+ yield generator(
+ period,
+ 0,
+ max_addr,
+ burst_size,
+ int(itt),
+ int(itt),
+ args.rd_perc,
+ 0,
+ num_seq_pkts,
+ page_size,
+ nbr_banks,
+ bank,
+ addr_map,
+ args.mem_ranks,
+ )
yield system.tgen.createExit(0)
+
system.tgen.start(trace())
m5.simulate()
-print("DRAM sweep with burst: %d, banks: %d, max stride: %d, request \
- generation period: %d" % (burst_size, nbr_banks, max_stride, itt))
+print(
+ "DRAM sweep with burst: %d, banks: %d, max stride: %d, request \
+ generation period: %d"
+ % (burst_size, nbr_banks, max_stride, itt)
+)
diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py
index b5fb9ff..acf527b 100644
--- a/configs/example/apu_se.py
+++ b/configs/example/apu_se.py
@@ -35,8 +35,10 @@
import m5
from m5.objects import *
from m5.util import addToPath
+from gem5.isas import ISA
+from gem5.runtime import get_runtime_isa
-addToPath('../')
+addToPath("../")
from ruby import Ruby
@@ -53,143 +55,307 @@
Options.addCommonOptions(parser)
Options.addSEOptions(parser)
-parser.add_argument("--cpu-only-mode", action="store_true", default=False,
- help="APU mode. Used to take care of problems in "
- "Ruby.py while running APU protocols")
-parser.add_argument("-u", "--num-compute-units", type=int, default=4,
- help="number of GPU compute units"),
-parser.add_argument("--num-cp", type=int, default=0,
- help="Number of GPU Command Processors (CP)")
-parser.add_argument("--benchmark-root",
- help="Root of benchmark directory tree")
+parser.add_argument(
+ "--cpu-only-mode",
+ action="store_true",
+ default=False,
+ help="APU mode. Used to take care of problems in "
+ "Ruby.py while running APU protocols",
+)
+parser.add_argument(
+ "-u",
+ "--num-compute-units",
+ type=int,
+ default=4,
+ help="number of GPU compute units",
+),
+parser.add_argument(
+ "--num-cp",
+ type=int,
+ default=0,
+ help="Number of GPU Command Processors (CP)",
+)
+parser.add_argument(
+ "--benchmark-root", help="Root of benchmark directory tree"
+)
# not super important now, but to avoid putting the number 4 everywhere, make
# it an option/knob
-parser.add_argument("--cu-per-sqc", type=int, default=4, help="number of CUs"
- "sharing an SQC (icache, and thus icache TLB)")
-parser.add_argument('--cu-per-scalar-cache', type=int, default=4,
- help='Number of CUs sharing a scalar cache')
-parser.add_argument("--simds-per-cu", type=int, default=4, help="SIMD units"
- "per CU")
-parser.add_argument('--cu-per-sa', type=int, default=4,
- help='Number of CUs per shader array. This must be a '
- 'multiple of options.cu-per-sqc and options.cu-per-scalar')
-parser.add_argument('--sa-per-complex', type=int, default=1,
- help='Number of shader arrays per complex')
-parser.add_argument('--num-gpu-complexes', type=int, default=1,
- help='Number of GPU complexes')
-parser.add_argument("--wf-size", type=int, default=64,
- help="Wavefront size(in workitems)")
-parser.add_argument("--sp-bypass-path-length", type=int, default=4,
- help="Number of stages of bypass path in vector ALU for "
- "Single Precision ops")
-parser.add_argument("--dp-bypass-path-length", type=int, default=4,
- help="Number of stages of bypass path in vector ALU for "
- "Double Precision ops")
+parser.add_argument(
+ "--cu-per-sqc",
+ type=int,
+ default=4,
+ help="number of CUs" "sharing an SQC (icache, and thus icache TLB)",
+)
+parser.add_argument(
+ "--cu-per-scalar-cache",
+ type=int,
+ default=4,
+ help="Number of CUs sharing a scalar cache",
+)
+parser.add_argument(
+ "--simds-per-cu", type=int, default=4, help="SIMD units" "per CU"
+)
+parser.add_argument(
+ "--cu-per-sa",
+ type=int,
+ default=4,
+ help="Number of CUs per shader array. This must be a "
+ "multiple of options.cu-per-sqc and options.cu-per-scalar",
+)
+parser.add_argument(
+ "--sa-per-complex",
+ type=int,
+ default=1,
+ help="Number of shader arrays per complex",
+)
+parser.add_argument(
+ "--num-gpu-complexes", type=int, default=1, help="Number of GPU complexes"
+)
+parser.add_argument(
+ "--wf-size", type=int, default=64, help="Wavefront size(in workitems)"
+)
+parser.add_argument(
+ "--sp-bypass-path-length",
+ type=int,
+ default=4,
+ help="Number of stages of bypass path in vector ALU for "
+ "Single Precision ops",
+)
+parser.add_argument(
+ "--dp-bypass-path-length",
+ type=int,
+ default=4,
+ help="Number of stages of bypass path in vector ALU for "
+ "Double Precision ops",
+)
# issue period per SIMD unit: number of cycles before issuing another vector
parser.add_argument(
- "--issue-period", type=int, default=4,
- help="Number of cycles per vector instruction issue period")
-parser.add_argument("--glbmem-wr-bus-width", type=int, default=32,
- help="VGPR to Coalescer (Global Memory) data bus width "
- "in bytes")
-parser.add_argument("--glbmem-rd-bus-width", type=int, default=32,
- help="Coalescer to VGPR (Global Memory) data bus width in "
- "bytes")
-# Currently we only support 1 local memory pipe
-parser.add_argument("--shr-mem-pipes-per-cu", type=int, default=1,
- help="Number of Shared Memory pipelines per CU")
-# Currently we only support 1 global memory pipe
-parser.add_argument("--glb-mem-pipes-per-cu", type=int, default=1,
- help="Number of Global Memory pipelines per CU")
-parser.add_argument("--wfs-per-simd", type=int, default=10, help="Number of "
- "WF slots per SIMD")
-
-parser.add_argument("--registerManagerPolicy", type=str, default="static",
- help="Register manager policy")
-parser.add_argument("--vreg-file-size", type=int, default=2048,
- help="number of physical vector registers per SIMD")
-parser.add_argument("--vreg-min-alloc", type=int, default=4,
- help="Minimum number of registers that can be allocated "
- "from the VRF. The total number of registers will be "
- "aligned to this value.")
-
-parser.add_argument("--sreg-file-size", type=int, default=2048,
- help="number of physical vector registers per SIMD")
-parser.add_argument("--sreg-min-alloc", type=int, default=4,
- help="Minimum number of registers that can be allocated "
- "from the SRF. The total number of registers will be "
- "aligned to this value.")
-
-parser.add_argument("--bw-scalor", type=int, default=0,
- help="bandwidth scalor for scalability analysis")
-parser.add_argument("--CPUClock", type=str, default="2GHz",
- help="CPU clock")
-parser.add_argument("--gpu-clock", type=str, default="1GHz",
- help="GPU clock")
-parser.add_argument("--cpu-voltage", action="store", type=str,
- default='1.0V',
- help="""CPU voltage domain""")
-parser.add_argument("--gpu-voltage", action="store", type=str,
- default='1.0V',
- help="""CPU voltage domain""")
-parser.add_argument("--CUExecPolicy", type=str, default="OLDEST-FIRST",
- help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
-parser.add_argument("--SegFaultDebug", action="store_true",
- help="checks for GPU seg fault before TLB access")
-parser.add_argument("--FunctionalTLB", action="store_true",
- help="Assumes TLB has no latency")
-parser.add_argument("--LocalMemBarrier", action="store_true",
- help="Barrier does not wait for writethroughs to complete")
+ "--issue-period",
+ type=int,
+ default=4,
+ help="Number of cycles per vector instruction issue period",
+)
parser.add_argument(
- "--countPages", action="store_true",
- help="Count Page Accesses and output in per-CU output files")
-parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth for"
- "TLBs")
-parser.add_argument("--pf-type", type=str, help="type of prefetch: "
- "PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
-parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
-parser.add_argument("--numLdsBanks", type=int, default=32,
- help="number of physical banks per LDS module")
-parser.add_argument("--ldsBankConflictPenalty", type=int, default=1,
- help="number of cycles per LDS bank conflict")
-parser.add_argument("--lds-size", type=int, default=65536,
- help="Size of the LDS in bytes")
-parser.add_argument('--fast-forward-pseudo-op', action='store_true',
- help='fast forward using kvm until the m5_switchcpu'
- ' pseudo-op is encountered, then switch cpus. subsequent'
- ' m5_switchcpu pseudo-ops will toggle back and forth')
-parser.add_argument("--num-hw-queues", type=int, default=10,
- help="number of hw queues in packet processor")
-parser.add_argument("--reg-alloc-policy", type=str, default="dynamic",
- help="register allocation policy (simple/dynamic)")
+ "--glbmem-wr-bus-width",
+ type=int,
+ default=32,
+ help="VGPR to Coalescer (Global Memory) data bus width " "in bytes",
+)
+parser.add_argument(
+ "--glbmem-rd-bus-width",
+ type=int,
+ default=32,
+ help="Coalescer to VGPR (Global Memory) data bus width in " "bytes",
+)
+# Currently we only support 1 local memory pipe
+parser.add_argument(
+ "--shr-mem-pipes-per-cu",
+ type=int,
+ default=1,
+ help="Number of Shared Memory pipelines per CU",
+)
+# Currently we only support 1 global memory pipe
+parser.add_argument(
+ "--glb-mem-pipes-per-cu",
+ type=int,
+ default=1,
+ help="Number of Global Memory pipelines per CU",
+)
+parser.add_argument(
+ "--wfs-per-simd",
+ type=int,
+ default=10,
+ help="Number of " "WF slots per SIMD",
+)
-parser.add_argument("--dgpu", action="store_true", default=False,
- help="Configure the system as a dGPU instead of an APU. "
- "The dGPU config has its own local memory pool and is not "
- "coherent with the host through hardware. Data is "
- "transfered from host to device memory using runtime calls "
- "that copy data over a PCIe-like IO bus.")
+parser.add_argument(
+ "--registerManagerPolicy",
+ type=str,
+ default="static",
+ help="Register manager policy",
+)
+parser.add_argument(
+ "--vreg-file-size",
+ type=int,
+ default=2048,
+ help="number of physical vector registers per SIMD",
+)
+parser.add_argument(
+ "--vreg-min-alloc",
+ type=int,
+ default=4,
+ help="Minimum number of registers that can be allocated "
+ "from the VRF. The total number of registers will be "
+ "aligned to this value.",
+)
+
+parser.add_argument(
+ "--sreg-file-size",
+ type=int,
+ default=2048,
+ help="number of physical vector registers per SIMD",
+)
+parser.add_argument(
+ "--sreg-min-alloc",
+ type=int,
+ default=4,
+ help="Minimum number of registers that can be allocated "
+ "from the SRF. The total number of registers will be "
+ "aligned to this value.",
+)
+
+parser.add_argument(
+ "--bw-scalor",
+ type=int,
+ default=0,
+ help="bandwidth scalor for scalability analysis",
+)
+parser.add_argument("--CPUClock", type=str, default="2GHz", help="CPU clock")
+parser.add_argument("--gpu-clock", type=str, default="1GHz", help="GPU clock")
+parser.add_argument(
+ "--cpu-voltage",
+ action="store",
+ type=str,
+ default="1.0V",
+ help="""CPU voltage domain""",
+)
+parser.add_argument(
+ "--gpu-voltage",
+ action="store",
+ type=str,
+ default="1.0V",
+ help="""CPU voltage domain""",
+)
+parser.add_argument(
+ "--CUExecPolicy",
+ type=str,
+ default="OLDEST-FIRST",
+ help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)",
+)
+parser.add_argument(
+ "--SegFaultDebug",
+ action="store_true",
+ help="checks for GPU seg fault before TLB access",
+)
+parser.add_argument(
+ "--FunctionalTLB", action="store_true", help="Assumes TLB has no latency"
+)
+parser.add_argument(
+ "--LocalMemBarrier",
+ action="store_true",
+ help="Barrier does not wait for writethroughs to complete",
+)
+parser.add_argument(
+ "--countPages",
+ action="store_true",
+ help="Count Page Accesses and output in per-CU output files",
+)
+parser.add_argument(
+ "--max-cu-tokens",
+ type=int,
+ default=4,
+ help="Number of coalescer tokens per CU",
+)
+parser.add_argument(
+ "--vrf_lm_bus_latency",
+ type=int,
+ default=1,
+ help="Latency while accessing shared memory",
+)
+parser.add_argument(
+ "--mem-req-latency",
+ type=int,
+ default=50,
+ help="Latency for requests from the cu to ruby.",
+)
+parser.add_argument(
+ "--mem-resp-latency",
+ type=int,
+ default=50,
+ help="Latency for responses from ruby to the cu.",
+)
+parser.add_argument(
+ "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs"
+)
+parser.add_argument(
+ "--pf-type",
+ type=str,
+ help="type of prefetch: " "PF_CU, PF_WF, PF_PHASE, PF_STRIDE",
+)
+parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
+parser.add_argument(
+ "--numLdsBanks",
+ type=int,
+ default=32,
+ help="number of physical banks per LDS module",
+)
+parser.add_argument(
+ "--ldsBankConflictPenalty",
+ type=int,
+ default=1,
+ help="number of cycles per LDS bank conflict",
+)
+parser.add_argument(
+ "--lds-size", type=int, default=65536, help="Size of the LDS in bytes"
+)
+parser.add_argument(
+ "--fast-forward-pseudo-op",
+ action="store_true",
+ help="fast forward using kvm until the m5_switchcpu"
+ " pseudo-op is encountered, then switch cpus. subsequent"
+ " m5_switchcpu pseudo-ops will toggle back and forth",
+)
+parser.add_argument(
+ "--num-hw-queues",
+ type=int,
+ default=10,
+ help="number of hw queues in packet processor",
+)
+parser.add_argument(
+ "--reg-alloc-policy",
+ type=str,
+ default="dynamic",
+ help="register allocation policy (simple/dynamic)",
+)
+
+parser.add_argument(
+ "--dgpu",
+ action="store_true",
+ default=False,
+ help="Configure the system as a dGPU instead of an APU. "
+ "The dGPU config has its own local memory pool and is not "
+ "coherent with the host through hardware. Data is "
+ "transfered from host to device memory using runtime calls "
+ "that copy data over a PCIe-like IO bus.",
+)
# Mtype option
-#-- 1 1 1 C_RW_S (Cached-ReadWrite-Shared)
-#-- 1 1 0 C_RW_US (Cached-ReadWrite-Unshared)
-#-- 1 0 1 C_RO_S (Cached-ReadOnly-Shared)
-#-- 1 0 0 C_RO_US (Cached-ReadOnly-Unshared)
-#-- 0 1 x UC_L2 (Uncached_GL2)
-#-- 0 0 x UC_All (Uncached_All_Load)
+# -- 1 1 1 C_RW_S (Cached-ReadWrite-Shared)
+# -- 1 1 0 C_RW_US (Cached-ReadWrite-Unshared)
+# -- 1 0 1 C_RO_S (Cached-ReadOnly-Shared)
+# -- 1 0 0 C_RO_US (Cached-ReadOnly-Unshared)
+# -- 0 1 x UC_L2 (Uncached_GL2)
+# -- 0 0 x UC_All (Uncached_All_Load)
# default value: 5/C_RO_S (only allow caching in GL2 for read. Shared)
-parser.add_argument("--m-type", type=int, default=5,
- help="Default Mtype for GPU memory accesses. This is the "
- "value used for all memory accesses on an APU and is the "
- "default mode for dGPU unless explicitly overwritten by "
- "the driver on a per-page basis. Valid values are "
- "between 0-7")
+parser.add_argument(
+ "--m-type",
+ type=int,
+ default=5,
+ help="Default Mtype for GPU memory accesses. This is the "
+ "value used for all memory accesses on an APU and is the "
+ "default mode for dGPU unless explicitly overwritten by "
+ "the driver on a per-page basis. Valid values are "
+ "between 0-7",
+)
-parser.add_argument("--gfx-version", type=str, default='gfx801',
- choices=GfxVersion.vals,
- help="Gfx version for gpu"
- "Note: gfx902 is not fully supported by ROCm")
+parser.add_argument(
+ "--gfx-version",
+ type=str,
+ default="gfx801",
+ choices=GfxVersion.vals,
+ help="Gfx version for gpu" "Note: gfx902 is not fully supported by ROCm",
+)
Ruby.define_options(parser)
@@ -206,21 +372,20 @@
benchmark_path = [args.benchmark_root]
else:
# Set default benchmark search path to current dir
- benchmark_path = ['.']
+ benchmark_path = ["."]
########################## Sanity Check ########################
# Currently the gpu model requires ruby
-if buildEnv['PROTOCOL'] == 'None':
+if buildEnv["PROTOCOL"] == "None":
fatal("GPU model requires ruby")
# Currently the gpu model requires only timing or detailed CPU
-if not (args.cpu_type == "TimingSimpleCPU" or
- args.cpu_type == "DerivO3CPU"):
+if not (args.cpu_type == "TimingSimpleCPU" or args.cpu_type == "DerivO3CPU"):
fatal("GPU model requires TimingSimpleCPU or DerivO3CPU")
# This file can support multiple compute units
-assert(args.num_compute_units >= 1)
+assert args.num_compute_units >= 1
# Currently, the sqc (I-Cache of GPU) is shared by
# multiple compute units(CUs). The protocol works just fine
@@ -229,20 +394,28 @@
# sharing sqc is the common usage)
n_cu = args.num_compute_units
num_sqc = int(math.ceil(float(n_cu) / args.cu_per_sqc))
-args.num_sqc = num_sqc # pass this to Ruby
+args.num_sqc = num_sqc # pass this to Ruby
num_scalar_cache = int(math.ceil(float(n_cu) / args.cu_per_scalar_cache))
args.num_scalar_cache = num_scalar_cache
-print('Num SQC = ', num_sqc, 'Num scalar caches = ', num_scalar_cache,
- 'Num CU = ', n_cu)
+print(
+ "Num SQC = ",
+ num_sqc,
+ "Num scalar caches = ",
+ num_scalar_cache,
+ "Num CU = ",
+ n_cu,
+)
########################## Creating the GPU system ########################
# shader is the GPU
-shader = Shader(n_wf = args.wfs_per_simd,
- clk_domain = SrcClockDomain(
- clock = args.gpu_clock,
- voltage_domain = VoltageDomain(
- voltage = args.gpu_voltage)))
+shader = Shader(
+ n_wf=args.wfs_per_simd,
+ clk_domain=SrcClockDomain(
+ clock=args.gpu_clock,
+ voltage_domain=VoltageDomain(voltage=args.gpu_voltage),
+ ),
+)
# VIPER GPU protocol implements release consistency at GPU side. So,
# we make their writes visible to the global memory and should read
@@ -252,7 +425,7 @@
# means pipeline initiates a acquire/release operation at kernel launch/end.
# VIPER protocol is write-through based, and thus only impl_kern_launch_acq
# needs to set.
-if (buildEnv['PROTOCOL'] == 'GPU_VIPER'):
+if buildEnv["PROTOCOL"] == "GPU_VIPER":
shader.impl_kern_launch_acq = True
shader.impl_kern_end_rel = False
else:
@@ -267,33 +440,36 @@
# List of compute units; one GPU can have multiple compute units
compute_units = []
for i in range(n_cu):
- compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane,
- num_SIMDs = args.simds_per_cu,
- wf_size = args.wf_size,
- spbypass_pipe_length = \
- args.sp_bypass_path_length,
- dpbypass_pipe_length = \
- args.dp_bypass_path_length,
- issue_period = args.issue_period,
- coalescer_to_vrf_bus_width = \
- args.glbmem_rd_bus_width,
- vrf_to_coalescer_bus_width = \
- args.glbmem_wr_bus_width,
- num_global_mem_pipes = \
- args.glb_mem_pipes_per_cu,
- num_shared_mem_pipes = \
- args.shr_mem_pipes_per_cu,
- n_wf = args.wfs_per_simd,
- execPolicy = args.CUExecPolicy,
- debugSegFault = args.SegFaultDebug,
- functionalTLB = args.FunctionalTLB,
- localMemBarrier = args.LocalMemBarrier,
- countPages = args.countPages,
- localDataStore = \
- LdsState(banks = args.numLdsBanks,
- bankConflictPenalty = \
- args.ldsBankConflictPenalty,
- size = args.lds_size)))
+ compute_units.append(
+ ComputeUnit(
+ cu_id=i,
+ perLaneTLB=per_lane,
+ num_SIMDs=args.simds_per_cu,
+ wf_size=args.wf_size,
+ spbypass_pipe_length=args.sp_bypass_path_length,
+ dpbypass_pipe_length=args.dp_bypass_path_length,
+ issue_period=args.issue_period,
+ coalescer_to_vrf_bus_width=args.glbmem_rd_bus_width,
+ vrf_to_coalescer_bus_width=args.glbmem_wr_bus_width,
+ num_global_mem_pipes=args.glb_mem_pipes_per_cu,
+ num_shared_mem_pipes=args.shr_mem_pipes_per_cu,
+ n_wf=args.wfs_per_simd,
+ execPolicy=args.CUExecPolicy,
+ debugSegFault=args.SegFaultDebug,
+ functionalTLB=args.FunctionalTLB,
+ localMemBarrier=args.LocalMemBarrier,
+ countPages=args.countPages,
+ max_cu_tokens=args.max_cu_tokens,
+ vrf_lm_bus_latency=args.vrf_lm_bus_latency,
+ mem_req_latency=args.mem_req_latency,
+ mem_resp_latency=args.mem_resp_latency,
+ localDataStore=LdsState(
+ banks=args.numLdsBanks,
+ bankConflictPenalty=args.ldsBankConflictPenalty,
+ size=args.lds_size,
+ ),
+ )
+ )
wavefronts = []
vrfs = []
vrf_pool_mgrs = []
@@ -301,48 +477,65 @@
srf_pool_mgrs = []
for j in range(args.simds_per_cu):
for k in range(shader.n_wf):
- wavefronts.append(Wavefront(simdId = j, wf_slot_id = k,
- wf_size = args.wf_size))
+ wavefronts.append(
+ Wavefront(simdId=j, wf_slot_id=k, wf_size=args.wf_size)
+ )
if args.reg_alloc_policy == "simple":
- vrf_pool_mgrs.append(SimplePoolManager(pool_size = \
- args.vreg_file_size,
- min_alloc = \
- args.vreg_min_alloc))
- srf_pool_mgrs.append(SimplePoolManager(pool_size = \
- args.sreg_file_size,
- min_alloc = \
- args.vreg_min_alloc))
+ vrf_pool_mgrs.append(
+ SimplePoolManager(
+ pool_size=args.vreg_file_size,
+ min_alloc=args.vreg_min_alloc,
+ )
+ )
+ srf_pool_mgrs.append(
+ SimplePoolManager(
+ pool_size=args.sreg_file_size,
+ min_alloc=args.vreg_min_alloc,
+ )
+ )
elif args.reg_alloc_policy == "dynamic":
- vrf_pool_mgrs.append(DynPoolManager(pool_size = \
- args.vreg_file_size,
- min_alloc = \
- args.vreg_min_alloc))
- srf_pool_mgrs.append(DynPoolManager(pool_size = \
- args.sreg_file_size,
- min_alloc = \
- args.vreg_min_alloc))
+ vrf_pool_mgrs.append(
+ DynPoolManager(
+ pool_size=args.vreg_file_size,
+ min_alloc=args.vreg_min_alloc,
+ )
+ )
+ srf_pool_mgrs.append(
+ DynPoolManager(
+ pool_size=args.sreg_file_size,
+ min_alloc=args.vreg_min_alloc,
+ )
+ )
- vrfs.append(VectorRegisterFile(simd_id=j, wf_size=args.wf_size,
- num_regs=args.vreg_file_size))
- srfs.append(ScalarRegisterFile(simd_id=j, wf_size=args.wf_size,
- num_regs=args.sreg_file_size))
+ vrfs.append(
+ VectorRegisterFile(
+ simd_id=j, wf_size=args.wf_size, num_regs=args.vreg_file_size
+ )
+ )
+ srfs.append(
+ ScalarRegisterFile(
+ simd_id=j, wf_size=args.wf_size, num_regs=args.sreg_file_size
+ )
+ )
compute_units[-1].wavefronts = wavefronts
compute_units[-1].vector_register_file = vrfs
compute_units[-1].scalar_register_file = srfs
- compute_units[-1].register_manager = \
- RegisterManager(policy=args.registerManagerPolicy,
- vrf_pool_managers=vrf_pool_mgrs,
- srf_pool_managers=srf_pool_mgrs)
+ compute_units[-1].register_manager = RegisterManager(
+ policy=args.registerManagerPolicy,
+ vrf_pool_managers=vrf_pool_mgrs,
+ srf_pool_managers=srf_pool_mgrs,
+ )
if args.TLB_prefetch:
compute_units[-1].prefetch_depth = args.TLB_prefetch
compute_units[-1].prefetch_prev_type = args.pf_type
# attach the LDS and the CU to the bus (actually a Bridge)
compute_units[-1].ldsPort = compute_units[-1].ldsBus.cpu_side_port
- compute_units[-1].ldsBus.mem_side_port = \
- compute_units[-1].localDataStore.cuPort
+ compute_units[-1].ldsBus.mem_side_port = compute_units[
+ -1
+ ].localDataStore.cuPort
# Attach compute units to GPU
shader.CUs = compute_units
@@ -361,20 +554,22 @@
CpuClass, mem_mode = Simulation.getCPUClass(args.cpu_type)
if CpuClass == AtomicSimpleCPU:
fatal("AtomicSimpleCPU is not supported")
-if mem_mode != 'timing':
+if mem_mode != "timing":
fatal("Only the timing memory mode is supported")
shader.timing = True
if args.fast_forward and args.fast_forward_pseudo_op:
- fatal("Cannot fast-forward based both on the number of instructions and"
- " on pseudo-ops")
+ fatal(
+ "Cannot fast-forward based both on the number of instructions and"
+ " on pseudo-ops"
+ )
fast_forward = args.fast_forward or args.fast_forward_pseudo_op
if fast_forward:
FutureCpuClass, future_mem_mode = CpuClass, mem_mode
CpuClass = X86KvmCPU
- mem_mode = 'atomic_noncaching'
+ mem_mode = "atomic_noncaching"
# Leave shader.timing untouched, because its value only matters at the
# start of the simulation and because we require switching cpus
# *before* the first kernel launch.
@@ -383,11 +578,13 @@
# Initial CPUs to be used during fast-forwarding.
for i in range(args.num_cpus):
- cpu = CpuClass(cpu_id = i,
- clk_domain = SrcClockDomain(
- clock = args.CPUClock,
- voltage_domain = VoltageDomain(
- voltage = args.cpu_voltage)))
+ cpu = CpuClass(
+ cpu_id=i,
+ clk_domain=SrcClockDomain(
+ clock=args.CPUClock,
+ voltage_domain=VoltageDomain(voltage=args.cpu_voltage),
+ ),
+ )
cpu_list.append(cpu)
if args.fast_forward:
@@ -400,20 +597,24 @@
# CPs to be used throughout the simulation.
for i in range(args.num_cp):
- cp = MainCpuClass(cpu_id = args.num_cpus + i,
- clk_domain = SrcClockDomain(
- clock = args.CPUClock,
- voltage_domain = VoltageDomain(
- voltage = args.cpu_voltage)))
+ cp = MainCpuClass(
+ cpu_id=args.num_cpus + i,
+ clk_domain=SrcClockDomain(
+ clock=args.CPUClock,
+ voltage_domain=VoltageDomain(voltage=args.cpu_voltage),
+ ),
+ )
cp_list.append(cp)
# Main CPUs (to be used after fast-forwarding if fast-forwarding is specified).
for i in range(args.num_cpus):
- cpu = MainCpuClass(cpu_id = i,
- clk_domain = SrcClockDomain(
- clock = args.CPUClock,
- voltage_domain = VoltageDomain(
- voltage = args.cpu_voltage)))
+ cpu = MainCpuClass(
+ cpu_id=i,
+ clk_domain=SrcClockDomain(
+ clock=args.CPUClock,
+ voltage_domain=VoltageDomain(voltage=args.cpu_voltage),
+ ),
+ )
if fast_forward:
cpu.switched_out = True
future_cpu_list.append(cpu)
@@ -434,21 +635,25 @@
# HSA kernel mode driver
# dGPUPoolID is 0 because we only have one memory pool
-gpu_driver = GPUComputeDriver(filename = "kfd", isdGPU = args.dgpu,
- gfxVersion = args.gfx_version,
- dGPUPoolID = 0, m_type = args.m_type)
+gpu_driver = GPUComputeDriver(
+ filename="kfd",
+ isdGPU=args.dgpu,
+ gfxVersion=args.gfx_version,
+ dGPUPoolID=0,
+ m_type=args.m_type,
+)
renderDriNum = 128
-render_driver = GPURenderDriver(filename = f'dri/renderD{renderDriNum}')
+render_driver = GPURenderDriver(filename=f"dri/renderD{renderDriNum}")
# Creating the GPU kernel launching components: that is the HSA
# packet processor (HSAPP), GPU command processor (CP), and the
# dispatcher.
-gpu_hsapp = HSAPacketProcessor(pioAddr=hsapp_gpu_map_paddr,
- numHWQueues=args.num_hw_queues)
+gpu_hsapp = HSAPacketProcessor(
+ pioAddr=hsapp_gpu_map_paddr, numHWQueues=args.num_hw_queues
+)
dispatcher = GPUDispatcher()
-gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp,
- dispatcher=dispatcher)
+gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp, dispatcher=dispatcher)
gpu_driver.device = gpu_cmd_proc
shader.dispatcher = dispatcher
shader.gpu_cmd_proc = gpu_cmd_proc
@@ -465,9 +670,11 @@
return full_path
fatal("%s not found in %s" % (rel_path, base_list))
+
def find_file(base_list, rel_path):
return find_path(base_list, rel_path, os.path.isfile)
+
executable = find_path(benchmark_path, args.cmd, os.path.exists)
# It's common for a benchmark to be in a directory with the same
# name as the executable, so we handle that automatically
@@ -476,35 +683,43 @@
executable = find_file(benchmark_path, args.cmd)
if args.env:
- with open(args.env, 'r') as f:
+ with open(args.env, "r") as f:
env = [line.rstrip() for line in f]
else:
- env = ['LD_LIBRARY_PATH=%s' % ':'.join([
- os.getenv('ROCM_PATH','/opt/rocm')+'/lib',
- os.getenv('HCC_HOME','/opt/rocm/hcc')+'/lib',
- os.getenv('HSA_PATH','/opt/rocm/hsa')+'/lib',
- os.getenv('HIP_PATH','/opt/rocm/hip')+'/lib',
- os.getenv('ROCM_PATH','/opt/rocm')+'/libhsakmt/lib',
- os.getenv('ROCM_PATH','/opt/rocm')+'/miopen/lib',
- os.getenv('ROCM_PATH','/opt/rocm')+'/miopengemm/lib',
- os.getenv('ROCM_PATH','/opt/rocm')+'/hipblas/lib',
- os.getenv('ROCM_PATH','/opt/rocm')+'/rocblas/lib',
- "/usr/lib/x86_64-linux-gnu"
- ]),
- 'HOME=%s' % os.getenv('HOME','/'),
- # Disable the VM fault handler signal creation for dGPUs also
- # forces the use of DefaultSignals instead of driver-controlled
- # InteruptSignals throughout the runtime. DefaultSignals poll
- # on memory in the runtime, while InteruptSignals call into the
- # driver.
- "HSA_ENABLE_INTERRUPT=1",
- # We don't have an SDMA hardware model, so need to fallback to
- # vector copy kernels for dGPU memcopies to/from host and device.
- "HSA_ENABLE_SDMA=0"]
+ env = [
+ "LD_LIBRARY_PATH=%s"
+ % ":".join(
+ [
+ os.getenv("ROCM_PATH", "/opt/rocm") + "/lib",
+ os.getenv("HCC_HOME", "/opt/rocm/hcc") + "/lib",
+ os.getenv("HSA_PATH", "/opt/rocm/hsa") + "/lib",
+ os.getenv("HIP_PATH", "/opt/rocm/hip") + "/lib",
+ os.getenv("ROCM_PATH", "/opt/rocm") + "/libhsakmt/lib",
+ os.getenv("ROCM_PATH", "/opt/rocm") + "/miopen/lib",
+ os.getenv("ROCM_PATH", "/opt/rocm") + "/miopengemm/lib",
+ os.getenv("ROCM_PATH", "/opt/rocm") + "/hipblas/lib",
+ os.getenv("ROCM_PATH", "/opt/rocm") + "/rocblas/lib",
+ "/usr/lib/x86_64-linux-gnu",
+ ]
+ ),
+ "HOME=%s" % os.getenv("HOME", "/"),
+ # Disable the VM fault handler signal creation for dGPUs also
+ # forces the use of DefaultSignals instead of driver-controlled
+ # InteruptSignals throughout the runtime. DefaultSignals poll
+ # on memory in the runtime, while InteruptSignals call into the
+ # driver.
+ "HSA_ENABLE_INTERRUPT=1",
+ # We don't have an SDMA hardware model, so need to fallback to
+ # vector copy kernels for dGPU memcopies to/from host and device.
+ "HSA_ENABLE_SDMA=0",
+ ]
-process = Process(executable = executable, cmd = [args.cmd]
- + args.options.split(),
- drivers = [gpu_driver, render_driver], env = env)
+process = Process(
+ executable=executable,
+ cmd=[args.cmd] + args.options.split(),
+ drivers=[gpu_driver, render_driver],
+ env=env,
+)
for cpu in cpu_list:
cpu.createThreads()
@@ -521,30 +736,39 @@
########################## Create the overall system ########################
# List of CPUs that must be switched when moving between KVM and simulation
if fast_forward:
- switch_cpu_list = \
- [(cpu_list[i], future_cpu_list[i]) for i in range(args.num_cpus)]
+ switch_cpu_list = [
+ (cpu_list[i], future_cpu_list[i]) for i in range(args.num_cpus)
+ ]
+
+# Other CPU strings cause bad addresses in ROCm. Revert back to M5 Simulator.
+for (i, cpu) in enumerate(cpu_list):
+ for j in range(len(cpu)):
+ cpu.isa[j].vendor_string = "M5 Simulator"
# Full list of processing cores in the system.
cpu_list = cpu_list + [shader] + cp_list
# creating the overall system
# notice the cpu list is explicitly added as a parameter to System
-system = System(cpu = cpu_list,
- mem_ranges = [AddrRange(args.mem_size)],
- cache_line_size = args.cacheline_size,
- mem_mode = mem_mode,
- workload = SEWorkload.init_compatible(executable))
+system = System(
+ cpu=cpu_list,
+ mem_ranges=[AddrRange(args.mem_size)],
+ cache_line_size=args.cacheline_size,
+ mem_mode=mem_mode,
+ workload=SEWorkload.init_compatible(executable),
+)
if fast_forward:
system.future_cpu = future_cpu_list
-system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
-system.clk_domain = SrcClockDomain(clock = args.sys_clock,
- voltage_domain = system.voltage_domain)
+system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
+system.clk_domain = SrcClockDomain(
+ clock=args.sys_clock, voltage_domain=system.voltage_domain
+)
if fast_forward:
- have_kvm_support = 'BaseKvmCPU' in globals()
- if have_kvm_support and buildEnv['TARGET_ISA'] == "x86":
+ have_kvm_support = "BaseKvmCPU" in globals()
+ if have_kvm_support and get_runtime_isa() == ISA.X86:
system.vm = KvmVM()
- system.m5ops_base = 0xffff0000
+ system.m5ops_base = 0xFFFF0000
for i in range(len(host_cpu.workload)):
host_cpu.workload[i].useArchPT = True
host_cpu.workload[i].kvmInSE = True
@@ -555,17 +779,19 @@
GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx)
# create Ruby system
-system.piobus = IOXBar(width=32, response_latency=0,
- frontend_latency=0, forward_latency=0)
+system.piobus = IOXBar(
+ width=32, response_latency=0, frontend_latency=0, forward_latency=0
+)
dma_list = [gpu_hsapp, gpu_cmd_proc]
Ruby.create_system(args, None, system, None, dma_list, None)
-system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
- voltage_domain = system.voltage_domain)
+system.ruby.clk_domain = SrcClockDomain(
+ clock=args.ruby_clock, voltage_domain=system.voltage_domain
+)
gpu_cmd_proc.pio = system.piobus.mem_side_ports
gpu_hsapp.pio = system.piobus.mem_side_ports
for i, dma_device in enumerate(dma_list):
- exec('system.dma_cntrl%d.clk_domain = system.ruby.clk_domain' % i)
+ exec("system.dma_cntrl%d.clk_domain = system.ruby.clk_domain" % i)
# attach the CPU ports to Ruby
for i in range(args.num_cpus):
@@ -579,15 +805,18 @@
system.cpu[i].dcache_port = ruby_port.in_ports
ruby_port.mem_request_port = system.piobus.cpu_side_ports
- if buildEnv['TARGET_ISA'] == "x86":
+ if get_runtime_isa() == ISA.X86:
system.cpu[i].interrupts[0].pio = system.piobus.mem_side_ports
- system.cpu[i].interrupts[0].int_requestor = \
- system.piobus.cpu_side_ports
- system.cpu[i].interrupts[0].int_responder = \
- system.piobus.mem_side_ports
+ system.cpu[i].interrupts[
+ 0
+ ].int_requestor = system.piobus.cpu_side_ports
+ system.cpu[i].interrupts[
+ 0
+ ].int_responder = system.piobus.mem_side_ports
if fast_forward:
system.cpu[i].mmu.connectWalkerPorts(
- ruby_port.in_ports, ruby_port.in_ports)
+ ruby_port.in_ports, ruby_port.in_ports
+ )
# attach CU ports to Ruby
# Because of the peculiarities of the CP core, you may have 1 CPU but 2
@@ -596,9 +825,12 @@
# the index as below, but note that this assumes there is one sequencer
# per compute unit and one sequencer per SQC for the math to work out
# correctly.
-gpu_port_idx = len(system.ruby._cpu_ports) \
- - args.num_compute_units - args.num_sqc \
- - args.num_scalar_cache
+gpu_port_idx = (
+ len(system.ruby._cpu_ports)
+ - args.num_compute_units
+ - args.num_sqc
+ - args.num_scalar_cache
+)
gpu_port_idx = gpu_port_idx - args.num_cp * 2
# Connect token ports. For this we need to search through the list of all
@@ -607,8 +839,9 @@
token_port_idx = 0
for i in range(len(system.ruby._cpu_ports)):
if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer):
- system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = \
- system.ruby._cpu_ports[i].gmTokenPort
+ system.cpu[shader_idx].CUs[
+ token_port_idx
+ ].gmTokenPort = system.ruby._cpu_ports[i].gmTokenPort
token_port_idx += 1
wavefront_size = args.wf_size
@@ -616,38 +849,45 @@
# The pipeline issues wavefront_size number of uncoalesced requests
# in one GPU issue cycle. Hence wavefront_size mem ports.
for j in range(wavefront_size):
- system.cpu[shader_idx].CUs[i].memory_port[j] = \
- system.ruby._cpu_ports[gpu_port_idx].in_ports[j]
+ system.cpu[shader_idx].CUs[i].memory_port[j] = system.ruby._cpu_ports[
+ gpu_port_idx
+ ].in_ports[j]
gpu_port_idx += 1
for i in range(n_cu):
if i > 0 and not i % args.cu_per_sqc:
print("incrementing idx on ", i)
gpu_port_idx += 1
- system.cpu[shader_idx].CUs[i].sqc_port = \
- system.ruby._cpu_ports[gpu_port_idx].in_ports
+ system.cpu[shader_idx].CUs[i].sqc_port = system.ruby._cpu_ports[
+ gpu_port_idx
+ ].in_ports
gpu_port_idx = gpu_port_idx + 1
for i in range(n_cu):
if i > 0 and not i % args.cu_per_scalar_cache:
print("incrementing idx on ", i)
gpu_port_idx += 1
- system.cpu[shader_idx].CUs[i].scalar_port = \
- system.ruby._cpu_ports[gpu_port_idx].in_ports
+ system.cpu[shader_idx].CUs[i].scalar_port = system.ruby._cpu_ports[
+ gpu_port_idx
+ ].in_ports
gpu_port_idx = gpu_port_idx + 1
# attach CP ports to Ruby
for i in range(args.num_cp):
system.cpu[cp_idx].createInterruptController()
- system.cpu[cp_idx].dcache_port = \
- system.ruby._cpu_ports[gpu_port_idx + i * 2].in_ports
- system.cpu[cp_idx].icache_port = \
- system.ruby._cpu_ports[gpu_port_idx + i * 2 + 1].in_ports
+ system.cpu[cp_idx].dcache_port = system.ruby._cpu_ports[
+ gpu_port_idx + i * 2
+ ].in_ports
+ system.cpu[cp_idx].icache_port = system.ruby._cpu_ports[
+ gpu_port_idx + i * 2 + 1
+ ].in_ports
system.cpu[cp_idx].interrupts[0].pio = system.piobus.mem_side_ports
- system.cpu[cp_idx].interrupts[0].int_requestor = \
- system.piobus.cpu_side_ports
- system.cpu[cp_idx].interrupts[0].int_responder = \
- system.piobus.mem_side_ports
+ system.cpu[cp_idx].interrupts[
+ 0
+ ].int_requestor = system.piobus.cpu_side_ports
+ system.cpu[cp_idx].interrupts[
+ 0
+ ].int_responder = system.piobus.mem_side_ports
cp_idx = cp_idx + 1
################# Connect the CPU and GPU via GPU Dispatcher ##################
@@ -665,15 +905,17 @@
########################## Start simulation ########################
-redirect_paths = [RedirectPath(app_path = "/proc",
- host_paths =
- ["%s/fs/proc" % m5.options.outdir]),
- RedirectPath(app_path = "/sys",
- host_paths =
- ["%s/fs/sys" % m5.options.outdir]),
- RedirectPath(app_path = "/tmp",
- host_paths =
- ["%s/fs/tmp" % m5.options.outdir])]
+redirect_paths = [
+ RedirectPath(
+ app_path="/proc", host_paths=["%s/fs/proc" % m5.options.outdir]
+ ),
+ RedirectPath(
+ app_path="/sys", host_paths=["%s/fs/sys" % m5.options.outdir]
+ ),
+ RedirectPath(
+ app_path="/tmp", host_paths=["%s/fs/tmp" % m5.options.outdir]
+ ),
+]
system.redirect_paths = redirect_paths
@@ -682,18 +924,22 @@
# Create the /sys/devices filesystem for the simulator so that the HSA Runtime
# knows what type of GPU hardware we are simulating
if args.dgpu:
- assert (args.gfx_version in ['gfx803', 'gfx900']),\
- "Incorrect gfx version for dGPU"
- if args.gfx_version == 'gfx803':
+ assert args.gfx_version in [
+ "gfx803",
+ "gfx900",
+ ], "Incorrect gfx version for dGPU"
+ if args.gfx_version == "gfx803":
hsaTopology.createFijiTopology(args)
- elif args.gfx_version == 'gfx900':
+ elif args.gfx_version == "gfx900":
hsaTopology.createVegaTopology(args)
else:
- assert (args.gfx_version in ['gfx801', 'gfx902']),\
- "Incorrect gfx version for APU"
+ assert args.gfx_version in [
+ "gfx801",
+ "gfx902",
+ ], "Incorrect gfx version for APU"
hsaTopology.createCarrizoTopology(args)
-m5.ticks.setGlobalFrequency('1THz')
+m5.ticks.setGlobalFrequency("1THz")
if args.abs_max_tick:
maxtick = args.abs_max_tick
else:
@@ -703,8 +949,7 @@
Simulation.setWorkCountOptions(system, args)
# Checkpointing is not supported by APU model
-if (args.checkpoint_dir != None or
- args.checkpoint_restore != None):
+if args.checkpoint_dir != None or args.checkpoint_restore != None:
fatal("Checkpointing not supported by apu model")
checkpoint_dir = None
@@ -739,6 +984,6 @@
exit_event = m5.simulate(maxtick - m5.curTick())
print("Ticks:", m5.curTick())
-print('Exiting because ', exit_event.getCause())
+print("Exiting because ", exit_event.getCause())
sys.exit(exit_event.getCode())
diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index 44e3fd1..9eeba37 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -46,7 +46,7 @@
from m5.options import *
import argparse
-m5.util.addToPath('../..')
+m5.util.addToPath("../..")
from common import SysPaths
from common import MemConfig
@@ -60,25 +60,21 @@
# l1_icache_class, l1_dcache_class, walk_cache_class, l2_Cache_class). Any of
# the cache class may be 'None' if the particular cache is not present.
cpu_types = {
-
- "atomic" : ( AtomicSimpleCPU, None, None, None),
- "minor" : (MinorCPU,
- devices.L1I, devices.L1D,
- devices.L2),
- "hpi" : ( HPI.HPI,
- HPI.HPI_ICache, HPI.HPI_DCache,
- HPI.HPI_L2)
+ "atomic": (AtomicSimpleCPU, None, None, None),
+ "minor": (MinorCPU, devices.L1I, devices.L1D, devices.L2),
+ "hpi": (HPI.HPI, HPI.HPI_ICache, HPI.HPI_DCache, HPI.HPI_L2),
}
+
def create_cow_image(name):
"""Helper function to create a Copy-on-Write disk image"""
image = CowDiskImage()
image.child.image_file = name
- return image;
+ return image
def create(args):
- ''' Create and configure the system object. '''
+ """Create and configure the system object."""
if args.readfile and not os.path.isfile(args.readfile):
print("Error: Bootscript %s does not exist" % args.readfile)
@@ -93,11 +89,13 @@
platform = ObjectList.platform_list.get(args.machine_type)
- system = devices.SimpleSystem(want_caches,
- args.mem_size,
- platform=platform(),
- mem_mode=mem_mode,
- readfile=args.readfile)
+ system = devices.SimpleSystem(
+ want_caches,
+ args.mem_size,
+ platform=platform(),
+ mem_mode=mem_mode,
+ readfile=args.readfile,
+ )
MemConfig.config_mem(args, system)
@@ -107,7 +105,7 @@
stdout=args.semi_stdout,
stderr=args.semi_stderr,
files_root_dir=args.semi_path,
- cmd_line = " ".join([ object_file ] + args.args)
+ cmd_line=" ".join([object_file] + args.args),
)
if args.disk_image:
@@ -116,17 +114,17 @@
# functionality to avoid writing changes to the stored copy of
# the disk image.
system.realview.vio[0].vio = VirtIOBlock(
- image=create_cow_image(args.disk_image))
+ image=create_cow_image(args.disk_image)
+ )
# Wire up the system's memory system
system.connect()
# Add CPU clusters to the system
system.cpu_cluster = [
- devices.CpuCluster(system,
- args.num_cores,
- args.cpu_freq, "1.0V",
- *cpu_types[args.cpu]),
+ devices.CpuCluster(
+ system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu]
+ )
]
# Create a cache hierarchy for the cluster. We are assuming that
@@ -143,11 +141,11 @@
system.highest_el_is_64 = True
workload_class = workloads.workload_list.get(args.workload)
- system.workload = workload_class(
- object_file, system)
+ system.workload = workload_class(object_file, system)
return system
+
def run(args):
cptdir = m5.options.outdir
if args.checkpoint:
@@ -171,67 +169,118 @@
def main():
parser = argparse.ArgumentParser(epilog=__doc__)
- parser.add_argument("--kernel", type=str,
- default=None,
- help="Binary to run")
- parser.add_argument("--workload", type=str,
- default="ArmBaremetal",
- choices=workloads.workload_list.get_names(),
- help="Workload type")
- parser.add_argument("--disk-image", type=str,
- default=None,
- help="Disk to instantiate")
- parser.add_argument("--readfile", type=str, default="",
- help = "File to return with the m5 readfile command")
- parser.add_argument("--cpu", type=str, choices=list(cpu_types.keys()),
- default="atomic",
- help="CPU model to use")
+ parser.add_argument(
+ "--kernel", type=str, default=None, help="Binary to run"
+ )
+ parser.add_argument(
+ "--workload",
+ type=str,
+ default="ArmBaremetal",
+ choices=workloads.workload_list.get_names(),
+ help="Workload type",
+ )
+ parser.add_argument(
+ "--disk-image", type=str, default=None, help="Disk to instantiate"
+ )
+ parser.add_argument(
+ "--readfile",
+ type=str,
+ default="",
+ help="File to return with the m5 readfile command",
+ )
+ parser.add_argument(
+ "--cpu",
+ type=str,
+ choices=list(cpu_types.keys()),
+ default="atomic",
+ help="CPU model to use",
+ )
parser.add_argument("--cpu-freq", type=str, default="4GHz")
- parser.add_argument("--num-cores", type=int, default=1,
- help="Number of CPU cores")
- parser.add_argument("--machine-type", type=str,
- choices=ObjectList.platform_list.get_names(),
- default="VExpress_GEM5_V2",
- help="Hardware platform class")
- parser.add_argument("--mem-type", default="DDR3_1600_8x8",
- choices=ObjectList.mem_list.get_names(),
- help = "type of memory to use")
- parser.add_argument("--mem-channels", type=int, default=1,
- help = "number of memory channels")
- parser.add_argument("--mem-ranks", type=int, default=None,
- help = "number of memory ranks per channel")
- parser.add_argument("--mem-size", action="store", type=str,
- default="2GB",
- help="Specify the physical memory size")
+ parser.add_argument(
+ "--num-cores", type=int, default=1, help="Number of CPU cores"
+ )
+ parser.add_argument(
+ "--machine-type",
+ type=str,
+ choices=ObjectList.platform_list.get_names(),
+ default="VExpress_GEM5_V2",
+ help="Hardware platform class",
+ )
+ parser.add_argument(
+ "--mem-type",
+ default="DDR3_1600_8x8",
+ choices=ObjectList.mem_list.get_names(),
+ help="type of memory to use",
+ )
+ parser.add_argument(
+ "--mem-channels", type=int, default=1, help="number of memory channels"
+ )
+ parser.add_argument(
+ "--mem-ranks",
+ type=int,
+ default=None,
+ help="number of memory ranks per channel",
+ )
+ parser.add_argument(
+ "--mem-size",
+ action="store",
+ type=str,
+ default="2GB",
+ help="Specify the physical memory size",
+ )
parser.add_argument("--checkpoint", action="store_true")
parser.add_argument("--restore", type=str, default=None)
- parser.add_argument("--dtb-gen", action="store_true",
- help="Doesn't run simulation, it generates a DTB only")
- parser.add_argument("--semi-enable", action="store_true",
- help="Enable semihosting support")
- parser.add_argument("--semi-stdin", type=str, default="stdin",
- help="Standard input for semihosting " \
- "(default: gem5's stdin)")
- parser.add_argument("--semi-stdout", type=str, default="stdout",
- help="Standard output for semihosting " \
- "(default: gem5's stdout)")
- parser.add_argument("--semi-stderr", type=str, default="stderr",
- help="Standard error for semihosting " \
- "(default: gem5's stderr)")
- parser.add_argument('--semi-path', type=str,
- default="",
- help=('Search path for files to be loaded through '
- 'Arm Semihosting'))
- parser.add_argument("args", default=[], nargs="*",
- help="Semihosting arguments to pass to benchmark")
- parser.add_argument("-P", "--param", action="append", default=[],
+ parser.add_argument(
+ "--dtb-gen",
+ action="store_true",
+ help="Doesn't run simulation, it generates a DTB only",
+ )
+ parser.add_argument(
+ "--semi-enable", action="store_true", help="Enable semihosting support"
+ )
+ parser.add_argument(
+ "--semi-stdin",
+ type=str,
+ default="stdin",
+ help="Standard input for semihosting " "(default: gem5's stdin)",
+ )
+ parser.add_argument(
+ "--semi-stdout",
+ type=str,
+ default="stdout",
+ help="Standard output for semihosting " "(default: gem5's stdout)",
+ )
+ parser.add_argument(
+ "--semi-stderr",
+ type=str,
+ default="stderr",
+ help="Standard error for semihosting " "(default: gem5's stderr)",
+ )
+ parser.add_argument(
+ "--semi-path",
+ type=str,
+ default="",
+ help=("Search path for files to be loaded through " "Arm Semihosting"),
+ )
+ parser.add_argument(
+ "args",
+ default=[],
+ nargs="*",
+ help="Semihosting arguments to pass to benchmark",
+ )
+ parser.add_argument(
+ "-P",
+ "--param",
+ action="append",
+ default=[],
help="Set a SimObject parameter relative to the root node. "
- "An extended Python multi range slicing syntax can be used "
- "for arrays. For example: "
- "'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
- "sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
- "Direct parameters of the root object are not accessible, "
- "only parameters of its children.")
+ "An extended Python multi range slicing syntax can be used "
+ "for arrays. For example: "
+ "'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
+ "sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
+ "Direct parameters of the root object are not accessible, "
+ "only parameters of its children.",
+ )
args = parser.parse_args()
@@ -247,9 +296,10 @@
if args.dtb_gen:
# No run, autogenerate DTB and exit
- root.system.generateDtb(os.path.join(m5.options.outdir, 'system.dtb'))
+ root.system.generateDtb(os.path.join(m5.options.outdir, "system.dtb"))
else:
run(args)
+
if __name__ == "__m5_main__":
main()
diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py
index a488ab3..c6560d7 100644
--- a/configs/example/arm/devices.py
+++ b/configs/example/arm/devices.py
@@ -37,20 +37,22 @@
import m5
from m5.objects import *
-m5.util.addToPath('../../')
+
+m5.util.addToPath("../../")
from common.Caches import *
from common import ObjectList
have_kvm = "ArmV8KvmCPU" in ObjectList.cpu_list.get_names()
have_fastmodel = "FastModelCortexA76" in ObjectList.cpu_list.get_names()
+
class L1I(L1_ICache):
tag_latency = 1
data_latency = 1
response_latency = 1
mshrs = 4
tgts_per_mshr = 8
- size = '48kB'
+ size = "48kB"
assoc = 3
@@ -60,7 +62,7 @@
response_latency = 1
mshrs = 16
tgts_per_mshr = 16
- size = '32kB'
+ size = "32kB"
assoc = 2
write_buffers = 16
@@ -71,21 +73,21 @@
response_latency = 5
mshrs = 32
tgts_per_mshr = 8
- size = '1MB'
+ size = "1MB"
assoc = 16
write_buffers = 8
- clusivity='mostly_excl'
+ clusivity = "mostly_excl"
class L3(Cache):
- size = '16MB'
+ size = "16MB"
assoc = 16
tag_latency = 20
data_latency = 20
response_latency = 20
mshrs = 20
tgts_per_mshr = 12
- clusivity='mostly_excl'
+ clusivity = "mostly_excl"
class MemBus(SystemXBar):
@@ -94,8 +96,17 @@
class CpuCluster(SubSystem):
- def __init__(self, system, num_cpus, cpu_clock, cpu_voltage,
- cpu_type, l1i_type, l1d_type, l2_type):
+ def __init__(
+ self,
+ system,
+ num_cpus,
+ cpu_clock,
+ cpu_voltage,
+ cpu_type,
+ l1i_type,
+ l1d_type,
+ l2_type,
+ ):
super(CpuCluster, self).__init__()
self._cpu_type = cpu_type
self._l1i_type = l1i_type
@@ -105,12 +116,16 @@
assert num_cpus > 0
self.voltage_domain = VoltageDomain(voltage=cpu_voltage)
- self.clk_domain = SrcClockDomain(clock=cpu_clock,
- voltage_domain=self.voltage_domain)
+ self.clk_domain = SrcClockDomain(
+ clock=cpu_clock, voltage_domain=self.voltage_domain
+ )
- self.cpus = [ self._cpu_type(cpu_id=system.numCpus() + idx,
- clk_domain=self.clk_domain)
- for idx in range(num_cpus) ]
+ self.cpus = [
+ self._cpu_type(
+ cpu_id=system.numCpus() + idx, clk_domain=self.clk_domain
+ )
+ for idx in range(num_cpus)
+ ]
for cpu in self.cpus:
cpu.createThreads()
@@ -157,11 +172,14 @@
int_cls = ArmPPI if pint < 32 else ArmSPI
for isa in cpu.isa:
isa.pmu = ArmPMU(interrupt=int_cls(num=pint))
- isa.pmu.addArchEvents(cpu=cpu,
- itb=cpu.mmu.itb, dtb=cpu.mmu.dtb,
- icache=getattr(cpu, 'icache', None),
- dcache=getattr(cpu, 'dcache', None),
- l2cache=getattr(self, 'l2', None))
+ isa.pmu.addArchEvents(
+ cpu=cpu,
+ itb=cpu.mmu.itb,
+ dtb=cpu.mmu.dtb,
+ icache=getattr(cpu, "icache", None),
+ dcache=getattr(cpu, "dcache", None),
+ l2cache=getattr(self, "l2", None),
+ )
for ev in events:
isa.pmu.addEvent(ev)
@@ -175,42 +193,55 @@
class AtomicCluster(CpuCluster):
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
- cpu_config = [ ObjectList.cpu_list.get("AtomicSimpleCPU"), None,
- None, None ]
- super(AtomicCluster, self).__init__(system, num_cpus, cpu_clock,
- cpu_voltage, *cpu_config)
+ cpu_config = [
+ ObjectList.cpu_list.get("AtomicSimpleCPU"),
+ None,
+ None,
+ None,
+ ]
+ super(AtomicCluster, self).__init__(
+ system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
+ )
+
def addL1(self):
pass
+
class KvmCluster(CpuCluster):
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
- cpu_config = [ ObjectList.cpu_list.get("ArmV8KvmCPU"), None, None,
- None ]
- super(KvmCluster, self).__init__(system, num_cpus, cpu_clock,
- cpu_voltage, *cpu_config)
+ cpu_config = [ObjectList.cpu_list.get("ArmV8KvmCPU"), None, None, None]
+ super(KvmCluster, self).__init__(
+ system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
+ )
+
def addL1(self):
pass
+
class FastmodelCluster(SubSystem):
- def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
+ def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
super(FastmodelCluster, self).__init__()
# Setup GIC
gic = system.realview.gic
- gic.sc_gic.cpu_affinities = ','.join(
- [ '0.0.%d.0' % i for i in range(num_cpus) ])
+ gic.sc_gic.cpu_affinities = ",".join(
+ ["0.0.%d.0" % i for i in range(num_cpus)]
+ )
# Parse the base address of redistributor.
redist_base = gic.get_redist_bases()[0]
redist_frame_size = 0x40000 if gic.sc_gic.has_gicv4_1 else 0x20000
- gic.sc_gic.reg_base_per_redistributor = ','.join([
- '0.0.%d.0=%#x' % (i, redist_base + redist_frame_size * i)
- for i in range(num_cpus)
- ])
+ gic.sc_gic.reg_base_per_redistributor = ",".join(
+ [
+ "0.0.%d.0=%#x" % (i, redist_base + redist_frame_size * i)
+ for i in range(num_cpus)
+ ]
+ )
gic_a2t = AmbaToTlmBridge64(amba=gic.amba_m)
- gic_t2g = TlmToGem5Bridge64(tlm=gic_a2t.tlm,
- gem5=system.iobus.cpu_side_ports)
+ gic_t2g = TlmToGem5Bridge64(
+ tlm=gic_a2t.tlm, gem5=system.iobus.cpu_side_ports
+ )
gic_g2t = Gem5ToTlmBridge64(gem5=system.membus.mem_side_ports)
gic_g2t.addr_ranges = gic.get_addr_ranges()
gic_t2a = AmbaFromTlmBridge64(tlm=gic_g2t.tlm)
@@ -223,28 +254,36 @@
system.gic_hub.gic_t2a = gic_t2a
self.voltage_domain = VoltageDomain(voltage=cpu_voltage)
- self.clk_domain = SrcClockDomain(clock=cpu_clock,
- voltage_domain=self.voltage_domain)
+ self.clk_domain = SrcClockDomain(
+ clock=cpu_clock, voltage_domain=self.voltage_domain
+ )
# Setup CPU
assert num_cpus <= 4
- CpuClasses = [FastModelCortexA76x1, FastModelCortexA76x2,
- FastModelCortexA76x3, FastModelCortexA76x4]
+ CpuClasses = [
+ FastModelCortexA76x1,
+ FastModelCortexA76x2,
+ FastModelCortexA76x3,
+ FastModelCortexA76x4,
+ ]
CpuClass = CpuClasses[num_cpus - 1]
- cpu = CpuClass(GICDISABLE=False)
+ cpu = CpuClass(
+ GICDISABLE=False, BROADCASTATOMIC=False, BROADCASTOUTER=False
+ )
for core in cpu.cores:
core.semihosting_enable = False
core.RVBARADDR = 0x10
core.redistributor = gic.redistributor
core.createThreads()
core.createInterruptController()
- self.cpus = [ cpu ]
+ self.cpus = [cpu]
+ self.cpu_hub = SubSystem()
a2t = AmbaToTlmBridge64(amba=cpu.amba)
t2g = TlmToGem5Bridge64(tlm=a2t.tlm, gem5=system.membus.cpu_side_ports)
- system.gic_hub.a2t = a2t
- system.gic_hub.t2g = t2g
+ self.cpu_hub.a2t = a2t
+ self.cpu_hub.t2g = t2g
system.addCpuCluster(self, num_cpus)
@@ -252,7 +291,7 @@
return False
def memoryMode(self):
- return 'atomic_noncaching'
+ return "atomic_noncaching"
def addL1(self):
pass
@@ -263,6 +302,7 @@
def connectMemSide(self, bus):
pass
+
class BaseSimpleSystem(ArmSystem):
cache_line_size = 64
@@ -271,15 +311,15 @@
self.voltage_domain = VoltageDomain(voltage="1.0V")
self.clk_domain = SrcClockDomain(
- clock="1GHz",
- voltage_domain=Parent.voltage_domain)
+ clock="1GHz", voltage_domain=Parent.voltage_domain
+ )
if platform is None:
self.realview = VExpress_GEM5_V1()
else:
self.realview = platform
- if hasattr(self.realview.gic, 'cpu_addr'):
+ if hasattr(self.realview.gic, "cpu_addr"):
self.gic_cpu_addr = self.realview.gic.cpu_addr