misc: Merge the v22.1 release staging into stable
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index b94899f..ebbbb1a 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -22,3 +22,7 @@
 c3bd8eb1214cbebbc92c7958b80aa06913bce3ba
 488ded0c8d9e43deef531ad174937982b41f8e4b
 26e888965d08486aeed7ebb3ef934ceb1a38cd6f
+
+# A commit which ran Python Black on all Python files.
+# https://gem5-review.googlesource.com/c/public/gem5/+/47024
+787204c92d876dd81357b75aede52d8ef5e053d3
diff --git a/.gitignore b/.gitignore
index 90a6bb2..229a0d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,4 @@
 configs/example/memcheck.cfg
 configs/dram/lowp_sweep.cfg
 .pyenv
+.vscode
diff --git a/.mailmap b/.mailmap
index 3cc7825..49c438d 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1,37 +1,43 @@
-ARM gem5 Developers <none@none>
 Abdul Mutaal Ahmad <abdul.mutaal@gmail.com>
+adarshpatil <adarshpatil123@gmail.com>
+Adrià Armejach <adria.armejach@bsc.es> Adrià Armejach <adria.armejach@gmail.com>
 Adrian Herrera <adrian.herrera@arm.com>
 Adrien Pesle <adrien.pesle@arm.com>
-Adrià Armejach <adria.armejach@bsc.es> Adrià Armejach <adria.armejach@gmail.com>
 Akash Bagdia <akash.bagdia@ARM.com> Akash Bagdia <akash.bagdia@arm.com>
 Alec Roelke <alec.roelke@gmail.com> Alec Roelke <ar4jc@virginia.edu>
+Alexander Klimov <Alexander.Klimov@arm.com>
 Alexandru Dutu <alexandru.dutu@amd.com> Alexandru <alexandru.dutu@amd.com>
+Alex Richardson <alexrichardson@google.com>
 Ali Jafri <ali.jafri@arm.com>
-Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <Ali.Saidi@ARM.com>
 Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <ali.saidi@arm.com>
+Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <Ali.Saidi@ARM.com>
 Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <saidi@eecs.umich.edu>
+Alistair Delva <adelva@google.com>
 Amin Farmahini <aminfar@gmail.com>
 Anders Handler <s052838@student.dtu.dk>
-Andrea Mondelli <andrea.mondelli@ucf.edu> Andrea Mondelli <Andrea.Mondelli@ucf.edu>
+Andrea Mondelli <andrea.mondelli@huawei.com> Andrea Mondelli <andrea.mondelli@ucf.edu>
+Andrea Mondelli <andrea.mondelli@huawei.com> Andrea Mondelli <Andrea.Mondelli@ucf.edu>
 Andrea Pellegrini <andrea.pellegrini@gmail.com>
-Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <Andreas.Hansson@ARM.com>
 Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson>
 Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson@arm.com>
+Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <Andreas.Hansson@ARM.com>
 Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson@armm.com>
-Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <Andreas.Sandberg@ARM.com>
 Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <andreas.sandberg@arm.com>
+Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <Andreas.Sandberg@ARM.com>
 Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <andreas@sandberg.pp.se>
 Andrew Bardsley <Andrew.Bardsley@arm.com> Andrew Bardsley <Andreas.Bardsley@arm.com>
 Andrew Lukefahr <lukefahr@umich.edu>
 Andrew Schultz <alschult@umich.edu>
 Andriani Mappoura <andriani.mappoura@arm.com>
-Ani Udipi <ani.udipi@arm.com>
+Angie Lee <peiyinglee@google.com>
 Anis Peysieux <anis.peysieux@inria.fr>
+Ani Udipi <ani.udipi@arm.com>
 Anouk Van Laer <anouk.vanlaer@arm.com>
-Arthur Perais <arthur.perais@inria.fr>
+ARM gem5 Developers <none@none>
+Arthur Perais <Arthur.Perais@univ-grenoble-alpes.fr> Arthur Perais <arthur.perais@inria.fr>
+Arun Rodrigues <afrodri@gmail.com>
 Ashkan Tousi <ashkan.tousimojarad@arm.com>
-Austin Harris <austinharris@utexas.edu>
-Richard D. Strong <r.d.strong@gmail.com>
+Austin Harris <austinharris@utexas.edu> Austin Harris <mail@austin-harris.com>
 Avishai Tvila <avishai.tvila@gmail.com>
 Ayaz Akram <yazakram@ucdavis.edu>
 Bagus Hanindhito <hanindhito@bagus.my.id>
@@ -41,80 +47,108 @@
 Bjoern A. Zeeb <baz21@cam.ac.uk>
 Blake Hechtman <bah13@duke.edu> Blake Hechtman <blake.hechtman@amd.com>
 Blake Hechtman <bah13@duke.edu> Blake Hechtman ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <bah13@duke.edu>
-Bobby R. Bruce <bbruce@ucdavis.edu>
+Bobby R. Bruce <bbruce@ucdavis.edu> Bobby Bruce <bbruce@amarillo.cs.ucdavis.edu>
 Boris Shingarov <shingarov@gmail.com> Boris Shingarov <shingarov@labware.com>
 Brad Beckmann <brad.beckmann@amd.com> Brad Beckmann <Brad.Beckmann@amd.com>
 Brad Beckmann <brad.beckmann@amd.com> Brad Beckmann ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <Brad.Beckmann@amd.com>
 Brad Danofsky <bradley.danofsky@amd.com>
 Bradley Wang <radwang@ucdavis.edu> Bradley <animalvgamer@gmail.com>
+Brandon Potter <brandon.potter@amd.com> BKP <brandon.potter@amd.com>
 Brandon Potter <brandon.potter@amd.com> bpotter <brandon.potter@amd.com>
 Brandon Potter <brandon.potter@amd.com> Brandon Potter <Brandon.Potter@amd.com>
-Brandon Potter <brandon.potter@amd.com> BKP <brandon.potter@amd.com>
 Brian Grayson <b.grayson@samsung.com>
 Cagdas Dirik <cdirik@micron.com> cdirik <cdirik@micron.com>
+Carlos Falquez <c.falquez@fz-juelich.de>
 Chander Sudanthi <chander.sudanthi@arm.com> Chander Sudanthi <Chander.Sudanthi@arm.com>
 Chander Sudanthi <chander.sudanthi@arm.com> Chander Sudanthi <Chander.Sudanthi@ARM.com>
+Charles Jamieson <cjamieson2@wisc.edu>
+CHEN Meng <tundriolaxy@gmail.com>
 Chen Zou <chenzou@uchicago.edu>
+Chia-You Chen <hortune@google.com>
+Chow, Marcus <marcus.chow@amd.com>
 Chris Adeniyi-Jones <Chris.Adeniyi-Jones@arm.com>
-Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@ARM.com>
 Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@arm.com>
+Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@ARM.com>
+Chris January <chris.january@arm.com>
 Christian Menard <christian.menard@tu-dresden.de> Christian Menard <Christian.Menard@tu-dresden.de>
-Christoph Pfister <pfistchr@student.ethz.ch>
 Christopher Torng <clt67@cornell.edu>
+Christoph Pfister <pfistchr@student.ethz.ch>
 Chuan Zhu <chuan.zhu@arm.com>
 Chun-Chen Hsu <chunchenhsu@google.com> Chun-Chen TK Hsu <chunchenhsu@google.com>
 Ciro Santilli <ciro.santilli@arm.com>
 Clint Smullen <cws3k@cs.virginia.edu>
+Cui Jin <cuijinbird@gmail.com> Cui Jin <cuijin7@huawei.com>
 Curtis Dunham <Curtis.Dunham@arm.com>
+Daecheol You <daecheol.you@samsung.com>
 Dam Sunwoo <dam.sunwoo@arm.com>
 Dan Gibson <gibson@cs.wisc.edu>
 Daniel Carvalho <odanrc@yahoo.com.br> Daniel <odanrc@yahoo.com.br>
 Daniel Carvalho <odanrc@yahoo.com.br> Daniel R. Carvalho <odanrc@yahoo.com.br>
+Daniel Gerzhoy <daniel.gerzhoy@gmail.com>
 Daniel Johnson <daniel.johnson@arm.com>
 Daniel Sanchez <sanchezd@stanford.edu>
+Davide Basilio Bartolini <davide.basilio.bartolini@huawei.com>
 David Guillen-Fandos <david.guillen@arm.com> David Guillen <david.guillen@arm.com>
 David Guillen-Fandos <david.guillen@arm.com> David Guillen Fandos <david.guillen@arm.com>
 David Hashe <david.hashe@amd.com> David Hashe <david.j.hashe@gmail.com>
 David Oehmke <doehmke@umich.edu>
+David Schall <david.schall2@arm.com>
+Derek Christ <dchrist@rhrk.uni-kl.de>
 Derek Hower <drh5@cs.wisc.edu>
-Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo <guodeyuan@tsinghua.org.cn>
 Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <guodeyuan@tsinghua.org.cn>
+Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo <guodeyuan@tsinghua.org.cn>
 Dibakar Gope <gope@wisc.edu> Dibakar Gope ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <gope@wisc.edu>
+Dimitrios Chasapis <k4s4s.heavener@gmail.com>
 Djordje Kovacevic <djordje.kovacevic@arm.com> Djordje Kovacevic <Djordje.Kovacevic@arm.com>
-Dongxue Zhang <elta.era@gmail.com>
 Do─čukan Korkmaztürk <d.korkmazturk@gmail.com>
+Dongxue Zhang <elta.era@gmail.com>
 Dylan Johnson <Dylan.Johnson@ARM.com>
 Earl Ou <shunhsingou@google.com>
+eavivi <eavivi@ucdavis.edu>
+Éder F. Zulian <zulian@eit.uni-kl.de>
 Edmund Grimley Evans <Edmund.Grimley-Evans@arm.com>
+Eduardo José Gómez Hernández <eduardojose.gomez@um.es>
+Eliot Moss <moss@cs.umass.edu>
 Emilio Castillo <castilloe@unican.es> Emilio Castillo <ecastill@bsc.es>
 Emilio Castillo <castilloe@unican.es> Emilio Castillo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <castilloe@unican.es>
+Emily Brickey <esbrickey@ucdavis.edu>
 Erfan Azarkhish <erfan.azarkhish@unibo.it>
+Erhu <fengerhu.ipads@gmail.com>
 Eric Van Hensbergen <eric.vanhensbergen@arm.com> Eric Van Hensbergen <Eric.VanHensbergen@ARM.com>
+Eric Ye <ericye@google.com>
 Erik Hallnor <ehallnor@umich.edu>
 Erik Tomusk <E.Tomusk@sms.ed.ac.uk>
 Faissal Sleiman <Faissal.Sleiman@arm.com> Faissal Sleiman <sleimanf@umich.edu>
 Fernando Endo <fernando.endo2@gmail.com>
+Franklin He <franklinh@google.com>
 Gabe Black <gabe.black@gmail.com> Gabe Black <gabeblack@google.com>
 Gabe Black <gabe.black@gmail.com> Gabe Black <gblack@eecs.umich.edu>
+Gabe Loh <gabriel.loh@amd.com> gloh <none@none>
 Gabor Dozsa <gabor.dozsa@arm.com>
+Gabriel Busnot <gabriel.busnot@arteris.com>
+gauravjain14 <gjain6@wisc.edu>
 Gedare Bloom <gedare@rtems.org> Gedare Bloom <gedare@gwmail.gwu.edu>
 Gene Wu <gene.wu@arm.com> Gene WU <gene.wu@arm.com>
 Gene WU <gene.wu@arm.com> Gene Wu <Gene.Wu@arm.com>
-Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <Geoffrey.Blake@arm.com>
 Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <blakeg@umich.edu>
+Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <Geoffrey.Blake@arm.com>
 Georg Kotheimer <georg.kotheimer@mailbox.tu-dresden.de>
 Giacomo Gabrielli <giacomo.gabrielli@arm.com> Giacomo Gabrielli <Giacomo.Gabrielli@arm.com>
 Giacomo Travaglini <giacomo.travaglini@arm.com>
 Glenn Bergmans <glenn.bergmans@arm.com>
+GWDx <gwdx@mail.ustc.edu.cn>
 Hamid Reza Khaleghzadeh <khaleghzadeh@gmail.com> Hamid Reza Khaleghzadeh ext:(%2C%20Lluc%20Alvarez%20%3Clluc.alvarez%40bsc.es%3E%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <khaleghzadeh@gmail.com>
+handsomeliu <handsomeliu@google.com>
 Hanhwi Jang <jang.hanhwi@gmail.com>
 Hoa Nguyen <hoanguyen@ucdavis.edu>
 Hongil Yoon <ongal@cs.wisc.edu>
 Hsuan Hsu <hsuan.hsu@mediatek.com>
+huangjs <jiasen.hjs@alibaba-inc.com>
 Hussein Elnawawy <hussein.elnawawy@gmail.com>
 Ian Jiang <ianjiang.ict@gmail.com>
 IanJiangICT <ianjiang.ict@gmail.com>
 Ilias Vougioukas <Ilias.Vougioukas@ARM.com>
+Iru Cai <mytbk920423@gmail.com>
 Isaac Richter <isaac.richter@rochester.edu>
 Isaac Sánchez Barrera <isaac.sanchez@bsc.es>
 Ivan Pizarro <ivan.pizarro@metempsy.com>
@@ -123,104 +157,152 @@
 Jakub Jermar <jakub@jermar.eu>
 James Clarkson <james.clarkson@arm.com>
 Jan-Peter Larsson <jan-peter.larsson@arm.com>
-Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <power.jg@gmail.com>
+Jan Vrany <jan.vrany@labware.com>
+Jarvis Jia <jia44@wisc.edu>
+Jasjeet Rangi <jasrangi@ucdavis.edu>
 Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <powerjg@cs.wisc.edu>
-Jason Lowe-Power <jason@lowepower.com> Jason Power <power.jg@gmail.com>
-Jason Lowe-Power <jason@lowepower.com> Jason Power <powerjg@cs.wisc.edu>
+Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <power.jg@gmail.com>
 Jason Lowe-Power <jason@lowepower.com> Jason Power ext:(%2C%20Joel%20Hestness%20%3Chestness%40cs.wisc.edu%3E) <power.jg@gmail.com>
+Jason Lowe-Power <jason@lowepower.com> Jason Power <powerjg@cs.wisc.edu>
+Jason Lowe-Power <jason@lowepower.com> Jason Power <power.jg@gmail.com>
+Jason Yu <yuzhijingcheng1996@hotmail.com>
 Javier Bueno Hedo <javier.bueno@metempsy.com> Javier Bueno <javier.bueno@metempsy.com>
 Javier Cano-Cano <javier.cano555@gmail.com>
+Javier Garcia Hernandez <avefenixavefenix@gmail.com>
 Javier Setoain <javier.setoain@arm.com>
 Jayneel Gandhi <jayneel@cs.wisc.edu>
 Jennifer Treichler <jtreichl@umich.edu>
-Jieming Yin <jieming.yin@amd.com>
+Jerin Joy <joy@rivosinc.com>
+Jiajie Chen <c@jia.je>
+Jiasen Huang <jiasen.hjs@alibaba-inc.com>
+Jiasen <jiasen.hjs@alibaba-inc.com>
+Jiayi Huang <jyhuang91@gmail.com>
+jiegec <noc@jiegec.ac.cn>
+Jieming Yin <jieming.yin@amd.com> jiemingyin <bjm419@gmail.com>
 Jing Qu <jqu32@wisc.edu> JingQuJQ <jqu32@wisc.edu>
 Jiuyue Ma <majiuyue@ncic.ac.cn>
 Joe Gross <joe.gross@amd.com> Joe Gross <joseph.gross@amd.com>
+Joel Hestness <jthestness@gmail.com> Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <jthestness@gmail.com>
 Joel Hestness <jthestness@gmail.com> Joel Hestness <hestness@cs.utexas.edu>
 Joel Hestness <jthestness@gmail.com> Joel Hestness <hestness@cs.wisc.edu>
-Joel Hestness <jthestness@gmail.com> Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <jthestness@gmail.com>
+Joël Porquet-Lupine <joel@porquet.org>
 John Alsop <johnathan.alsop@amd.com>
 John Kalamatianos <john.kalamatianos@amd.com> jkalamat <john.kalamatianos@amd.com>
 Jordi Vaquero <jordi.vaquero@metempsy.com>
 Jose Marinho <jose.marinho@arm.com>
+Juan M. Cebrian <jm.cebriangonzalez@gmail.com>
 Jui-min Lee <fcrh@google.com>
+kai.ren <kai.ren@streamcomputing.com> Kai Ren <binarystar2006@outlook.com>
 Kanishk Sugand <kanishk.sugand@arm.com>
 Karthik Sangaiah <karthik.sangaiah@arm.com>
+Kaustav Goswami <kggoswami@ucdavis.edu>
+Kelly Nguyen <klynguyen@ucdavis.edu>
 Ke Meng <mengke97@hotmail.com>
 Kevin Brodsky <kevin.brodsky@arm.com>
 Kevin Lim <ktlim@umich.edu>
+Kevin Loughlin <kevlough@umich.edu>
 Khalique <khalique913@gmail.com>
 Koan-Sin Tan <koansin.tan@gmail.com>
 Korey Sewell <ksewell@umich.edu>
 Krishnendra Nathella <Krishnendra.Nathella@arm.com> Krishnendra Nathella <krinat01@arm.com>
+ksco <numbksco@gmail.com>
+kunpai <kunpai@ucdavis.edu>
+Kyle Roarty <kyleroarty1716@gmail.com> Kyle Roarty <Kyle.Roarty@amd.com>
+Laura Hinman <llhinman@ucdavis.edu>
 Lena Olson <leolson@google.com> Lena Olson <lena@cs.wisc,edu>
 Lena Olson <leolson@google.com> Lena Olson <lena@cs.wisc.edu>
 Lisa Hsu <Lisa.Hsu@amd.com> Lisa Hsu <hsul@eecs.umich.edu>
 Lluc Alvarez <lluc.alvarez@bsc.es>
 Lluís Vilanova <vilanova@ac.upc.edu> Lluis Vilanova <vilanova@ac.upc.edu>
+Lukas Steiner <lsteiner@rhrk.uni-kl.de>
+Luming Wang <wlm199558@126.com>
+m5test <m5test@zizzer>
 Mahyar Samani <msamani@ucdavis.edu>
+Majid Jalili <majid0jalili@gmail.com>
 Malek Musleh <malek.musleh@gmail.com> Nilay Vaish ext:(%2C%20Malek%20Musleh%20%3Cmalek.musleh%40gmail.com%3E) <nilay@cs.wisc.edu>
 Marc Mari Barcelo <marc.maribarcelo@arm.com>
-Marc Orr <marc.orr@gmail.com> Marc Orr <morr@cs.wisc.edu>
 Marco Balboni <Marco.Balboni@ARM.com>
 Marco Elver <Marco.Elver@ARM.com> Marco Elver <marco.elver@ed.ac.uk>
+Marc Orr <marc.orr@gmail.com> Marc Orr <morr@cs.wisc.edu>
+Marjan Fariborz <mfariborz@ucdavis.edu> marjanfariborz <mfariborz@ucdavis.edu>
+Mark Hildebrand <mhildebrand@ucdavis.edu>
+Marton Erdos <marton.erdos@arm.com>
+Maryam Babaie <mbabaie@ucdavis.edu>
 Matt DeVuyst <mdevuyst@gmail.com>
-Matt Evans <matt.evans@arm.com> Matt Evans <Matt.Evans@arm.com>
-Matt Horsnell <matt.horsnell@arm.com>Matt Horsnell <Matt.Horsnell@ARM.com>
-Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <Matt.Horsnell@arm.com>
-Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <matt.horsnell@ARM.com>
-Matt Poremba <matthew.poremba@amd.com> Matt Poremba <Matthew.Poremba@amd.com>
 Matteo Andreozzi <matteo.andreozzi@arm.com> Matteo Andreozzi <Matteo.Andreozzi@arm.com>
 Matteo M. Fusi <matteo.fusi@bsc.es>
+Matt Evans <matt.evans@arm.com> Matt Evans <Matt.Evans@arm.com>
 Matthew Poremba <matthew.poremba@amd.com> Matthew Poremba <Matthew.Poremba@amd.com>
-Matt Sinclair <mattdsinclair@gmail.com> Matthew Sinclair <matthew.sinclair@amd.com>
 Matthias Hille <matthiashille8@gmail.com>
 Matthias Jung <jungma@eit.uni-kl.de>
+Matthias Jung <matthias.jung@iese.fraunhofer.de>
+Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <matt.horsnell@ARM.com>
+Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <Matt.Horsnell@arm.com>
+Matt Horsnell <matt.horsnell@arm.com>Matt Horsnell <Matt.Horsnell@ARM.com>
+Matt Poremba <matthew.poremba@amd.com> Matt Poremba <Matthew.Poremba@amd.com>
+Matt Sinclair <mattdsinclair@gmail.com> Matthew Sinclair <matthew.sinclair@amd.com>
+Matt Sinclair <mattdsinclair.wisc@gmail.com> Matt Sinclair <Matthew.Sinclair@amd.com>
 Maurice Becker <madnaurice@googlemail.com>
 Maxime Martinasso <maxime.cscs@gmail.com>
-Maximilian Stein <maximilian.stein@tu-dresden.de>
+Maximilian Stein <maximilian.stein@tu-dresden.de>Maximilian Stein <m@steiny.biz>
 Maximilien Breughe <maximilien.breughe@elis.ugent.be> Maximilien Breughe <Maximilien.Breughe@elis.ugent.be>
+Melissa Jost <melissakjost@gmail.com>
 Michael Adler <Michael.Adler@intel.com>
+Michael Boyer <Michael.Boyer@amd.com>
 Michael LeBeane <michael.lebeane@amd.com> Michael LeBeane <Michael.Lebeane@amd.com>
 Michael LeBeane <michael.lebeane@amd.com> mlebeane <michael.lebeane@amd.com>
 Michael Levenhagen <mjleven@sandia.gov>
-Michiel Van Tol <michiel.vantol@arm.com> Michiel W. van Tol <Michiel.VanTol@arm.com>
 Michiel Van Tol <michiel.vantol@arm.com> Michiel van Tol <Michiel.VanTol@arm.com>
+Michiel Van Tol <michiel.vantol@arm.com> Michiel W. van Tol <Michiel.VanTol@arm.com>
 Miguel Serrano <mserrano@umich.edu>
+Mike Upton <michaelupton@gmail.com>
 Miles Kaufmann <milesck@eecs.umich.edu>
-Min Kyu Jeong <minkyu.jeong@arm.com> Min Kyu Jeong <MinKyu.Jeong@arm.com>
 Mingyuan <xiang_my@outlook.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@ARM.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@arm.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <mitch.hayenga+gem5@gmail.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>
+Min Kyu Jeong <minkyu.jeong@arm.com> Min Kyu Jeong <MinKyu.Jeong@arm.com>
 Mitch Hayenga <mitch.hayenga@arm.com> Mitchell Hayenga <Mitchell.Hayenga@ARM.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@arm.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@ARM.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <mitch.hayenga+gem5@gmail.com>
 Mohammad Alian <m.alian1369@gmail.com>
 Monir Mozumder <monir.mozumder@amd.com>
 Moyang Wang <mw828@cornell.edu>
 Mrinmoy Ghosh <mrinmoy.ghosh@arm.com> Mrinmoy Ghosh <Mrinmoy.Ghosh@arm.com>
-Nathan Binkert <nate@binkert.org> Nathan Binkert <binkertn@umich.edu>
+Muhammad Sarmad Saeed <mssaeed@ucdavis.edu>
+Nadia Etemadi <netemadi@ucdavis.edu>
 Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <Nathanael.Premillieu@arm.com>
+Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathanael.premillieu@huawei.com>
 Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathanael.premillieu@irisa.fr>
 Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathananel.premillieu@arm.com>
 Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <npremill@irisa.fr>
+Nathan Binkert <nate@binkert.org> Nathan Binkert <binkertn@umich.edu>
 Nayan Deshmukh <nayan26deshmukh@gmail.com>
 Neha Agarwal <neha.agarwal@arm.com>
+Neil Natekar <nanatekar@ucdavis.edu>
 Nicholas Lindsay <nicholas.lindsay@arm.com>
+Nicolas Boichat <drinkcat@google.com>
 Nicolas Derumigny <nderumigny@gmail.com>
 Nicolas Zea <nicolas.zea@gmail.com>
 Nikos Nikoleris <nikos.nikoleris@arm.com> Nikos Nikoleris <nikos.nikoleris@gmail.com>
+Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) <nilay@cs.wisc.edu>
 Nils Asmussen <nils.asmussen@barkhauseninstitut.org> Nils Asmussen <nilsasmussen7@gmail.com>
+Noah Katz <nkatz@rivosinc.com>
+ntampouratzis <ntampouratzis@isc.tuc.gr>
 Nuwan Jayasena <Nuwan.Jayasena@amd.com>
 Ola Jeppsson <ola.jeppsson@gmail.com>
 Omar Naji <Omar.Naji@arm.com>
+Onur Kayiran <onur.kayiran@amd.com>
 Pablo Prieto <pablo.prieto@unican.es>
+paikunal <kunpai@ucdavis.edu>
 Palle Lyckegaard <palle@lyckegaard.dk>
 Pau Cabre <pau.cabre@metempsy.com>
 Paul Rosenfeld <prosenfeld@micron.com> Paul Rosenfeld <dramninjas@gmail.com>
 Paul Rosenfeld <prosenfeld@micron.com> Paul Rosenfeld <prosenfeld@micon.com>
 Peter Enns <Peter.Enns@arm.com> Pierre-Yves Péneau <pierre-yves.peneau@lirmm.fr>
+Peter <petery.hin@huawei.com>
+Peter Yuen <ppeetteerrsx@gmail.com>
+Philip Metzler <cpmetz@google.com>
+Pierre Ayoub <pierre.ayoub.pro@tutanota.com>
 Pin-Yen Lin <treapking@google.com>
 Po-Hao Su <supohaosu@gmail.com>
 Polina Dudnik <pdudnik@cs.wisc.edu> Polina Dudnik <pdudnik@gmail.com>
@@ -229,23 +311,26 @@
 Prakash Ramrakhyani <prakash.ramrakhyani@arm.com> Prakash Ramrakhani <Prakash.Ramrakhani@arm.com>
 Prakash Ramrakhyani <prakash.ramrakhyani@arm.com> Prakash Ramrakhyani <Prakash.Ramrakhyani@arm.com>
 Pritha Ghoshal <pritha9987@tamu.edu>
+Quentin Forcioli <quentin.forcioli@telecom-paris.fr>
 Radhika Jagtap <radhika.jagtap@arm.com> Radhika Jagtap <radhika.jagtap@ARM.com>
 Rahul Thakur <rjthakur@google.com>
 Reiley Jeapaul <Reiley.Jeyapaul@arm.com>
-Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai <Rekai.GonzalezAlberquilla@arm.com>
-Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
 Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <rekai.gonzalezalberquilla@arm.com>
+Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
 Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez-Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
+Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai <Rekai.GonzalezAlberquilla@arm.com>
 Rene de Jong <rene.dejong@arm.com>
 Ricardo Alves <ricardo.alves@arm.com>
+Richard Cooper <richard.cooper@arm.com>
+Richard D. Strong <r.d.strong@gmail.com>
 Richard Strong <rstrong@hp.com> Richard Strong <r.d.strong@gmail.com>
 Richard Strong <rstrong@hp.com> Richard Strong <rstrong@cs.ucsd.edu>
 Richard Strong <rstrong@hp.com> Rick Strong <rstrong@cs.ucsd.edu>
 Rico Amslinger <rico.amslinger@informatik.uni-augsburg.de>
 Riken Gohil <Riken.Gohil@arm.com>
 Rizwana Begum <rb639@drexel.edu>
-Robert Scheffel <robert.scheffel1@tu-dresden.de> Robert <robert.scheffel1@tu-dresden.de>
 Robert Kovacsics <rmk35@cl.cam.ac.uk>
+Robert Scheffel <robert.scheffel1@tu-dresden.de> Robert <robert.scheffel1@tu-dresden.de>
 Rohit Kurup <rohit.kurup@arm.com>
 Ron Dreslinski <rdreslin@umich.edu> Ronald Dreslinski <rdreslin@umich.edu>
 Ruben Ayrapetyan <ruben.ayrapetyan@arm.com>
@@ -253,20 +338,27 @@
 Ruslan Bukin <br@bsdpad.com> Ruslan Bukin ext:(%2C%20Zhang%20Guoye) <br@bsdpad.com>
 Rutuja Oza <roza@ucdavis.edu>
 Ryan Gambord <gambordr@oregonstate.edu>
+sacak32 <byrakocalan99@gmail.com>
+Sampad Mohapatra <sampad.mohapatra@gmail.com>
 Samuel Grayson <sam@samgrayson.me>
-Sandipan Das <sandipan@linux.ibm.com>
+Samuel Stark <samuel.stark2@arm.com>
+Sandipan Das <31861871+sandip4n@users.noreply.github.com>
+Sandipan Das <sandipan@linux.ibm.com> Sandipan Das <31861871+sandip4n@users.noreply.github.com>
 Santi Galan <santi.galan@metempsy.com>
-Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <Sascha.Bischoff@ARM.com>
 Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <sascha.bischoff@ARM.com>
+Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <Sascha.Bischoff@ARM.com>
 Sean McGoogan <Sean.McGoogan@arm.com>
 Sean Wilson <spwilson2@wisc.edu>
 Sergei Trofimov <sergei.trofimov@arm.com>
 Severin Wischmann <wiseveri@student.ethz.ch> Severin Wischmann ext:(%2C%20Ioannis%20Ilkos%20%3Cioannis.ilkos09%40imperial.ac.uk%3E) <wiseveri@student.ethz.ch>
 Shawn Rosti <shawn.rosti@gmail.com>
 Sherif Elhabbal <elhabbalsherif@gmail.com>
+Shivani Parekh <shparekh@ucdavis.edu>
+Shivani <shparekh@ucdavis.edu>
 Siddhesh Poyarekar <siddhesh.poyarekar@gmail.com>
 Somayeh Sardashti <somayeh@cs.wisc.edu>
 Sooraj Puthoor <puthoorsooraj@gmail.com>
+Sooraj Puthoor <Sooraj.Puthoor@amd.com>
 Sophiane Senni <sophiane.senni@gmail.com>
 Soumyaroop Roy <sroy@cse.usf.edu>
 Srikant Bharadwaj <srikant.bharadwaj@amd.com>
@@ -275,13 +367,14 @@
 Stephan Diestelhorst <stephan.diestelhorst@arm.com> Stephan Diestelhorst <stephan.diestelhorst@ARM.com>
 Stephen Hines <hines@cs.fsu.edu>
 Steve Raasch <sraasch@umich.edu>
-Steve Reinhardt <stever@gmail.com> Steve Reinhardt <Steve.Reinhardt@amd.com>
-Steve Reinhardt <stever@gmail.com> Steve Reinhardt <steve.reinhardt@amd.com>
-Steve Reinhardt <stever@gmail.com> Steve Reinhardt <stever@eecs.umich.edu>
 Steve Reinhardt <stever@gmail.com> Steve Reinhardt ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E%2C%20Ali%20Saidi%20%3CAli.Saidi%40ARM.com%3E) <stever@gmail.com>
+Steve Reinhardt <stever@gmail.com> Steve Reinhardt <stever@eecs.umich.edu>
+Steve Reinhardt <stever@gmail.com> Steve Reinhardt <steve.reinhardt@amd.com>
+Steve Reinhardt <stever@gmail.com> Steve Reinhardt <Steve.Reinhardt@amd.com>
 Stian Hvatum <stian@dream-web.no>
 Sudhanshu Jha <sudhanshu.jha@arm.com>
 Sujay Phadke <electronicsguy123@gmail.com>
+Sungkeun Kim <ksungkeun84@tamu.edu>
 Swapnil Haria <swapnilster@gmail.com> Swapnil Haria <swapnilh@cs.wisc.edu>
 Taeho Kgil <tkgil@umich.edu>
 Tao Zhang <tao.zhang.0924@gmail.com>
@@ -290,45 +383,50 @@
 Tim Harris <tharris@microsoft.com>
 Timothy Hayes <timothy.hayes@arm.com>
 Timothy M. Jones <timothy.jones@arm.com> Timothy Jones <timothy.jones@cl.cam.ac.uk>
-Timothy M. Jones <timothy.jones@arm.com> Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) <nilay@cs.wisc.edu>
 Timothy M. Jones <timothy.jones@arm.com> Timothy M. Jones <timothy.jones@cl.cam.ac.uk>
 Timothy M. Jones <timothy.jones@arm.com> Timothy M. Jones <tjones1@inf.ed.ac.uk>
 Tom Jablin <tjablin@gmail.com>
 Tommaso Marinelli <tommarin@ucm.es>
+Tom Rollet <tom.rollet@huawei.com>
+Tong Shen <endlessroad@google.com>
 Tony Gutierrez <anthony.gutierrez@amd.com> Anthony Gutierrez <atgutier@umich.edu>
-Tuan Ta <qtt2@cornell.edu> Tuan Ta <taquangtuan1992@gmail.com>
-Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <Tushar.Krishna@amd.com>
+Travis Boraten <travis.boraten@amd.com>
+Trivikram Reddy <tvreddy@ucdavis.edu> tv-reddy <tvreddy@ucdavis.edu>
+Tuan Ta <qtt2@cornell.edu> Tuan Ta <taquangtuan1992@gmail.com> Tuan Ta <tuan.ta@amd.com>
 Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <tushar@csail.mit.edu>
+Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <Tushar.Krishna@amd.com>
 Umesh Bhaskar <umesh.b2006@gmail.com>
 Uri Wiener <uri.wiener@arm.com>
 Victor Garcia <victor.garcia@arm.com>
 Vilas Sridharan <vilas.sridharan@gmail.com>
-Vince Weaver <vince@csl.cornell.edu>
 Vincentius Robby <acolyte@umich.edu>
+Vince Weaver <vince@csl.cornell.edu>
+vramadas95 <vramadas@wisc.edu>
+vsoria <victor.soria@bsc.es>
 Wade Walker <wade.walker@arm.com>
+Wei-Han Chen <weihanchen@google.com>
 Weiping Liao <weipingliao@google.com>
+Wende Tan <twd2@163.com>
 Wendy Elsasser <wendy.elsasser@arm.com>
-William Wang <william.wang@arm.com> William Wang <William.Wang@ARM.com>
 William Wang <william.wang@arm.com> William Wang <William.Wang@arm.com>
+William Wang <william.wang@arm.com> William Wang <William.Wang@ARM.com>
 Willy Wolff <willy.mh.wolff.ml@gmail.com>
+Wing Li <wingers@google.com>
 Xiangyu Dong <rioshering@gmail.com>
-Xianwei Zhang <xianwei.zhang@amd.com>
+Xianwei Zhang <xianwei.zhang.@amd.com> Xianwei Zhang <xianwei.zhang@amd.com>
 Xiaoyu Ma <xiaoyuma@google.com>
 Xin Ouyang <xin.ouyang@streamcomputing.com>
+Xiongfei <xiongfei.liao@gmail.com>
 Yasuko Eckert <yasuko.eckert@amd.com>
-Yi Xiang <yix@colostate.edu>
+Yen-lin Lai <yenlinlai@google.com>
 Yifei Liu <liu.ad2039@gmail.com>
-Yu-hsin Wang <yuhsingw@google.com>
+yiwkd2 <yiwkd2@gmail.com>
+Yi Xiang <yix@colostate.edu>
 Yuan Yao <yuanyao@seas.harvard.edu>
 Yuetsu Kodama <yuetsu.kodama@riken.jp> yuetsu.kodama <yuetsu.kodama@riken.jp>
+Yu-hsin Wang <yuhsingw@google.com>
 Zhang Zheng <perise@gmail.com>
+Zhantong Qiu <ztqiu@ucdavis.edu>
+Zhengrong Wang <seanzw@ucla.edu> seanzw <seanyukigeek@gmail.com>
+zhongchengyong <zhongcy93@gmail.com>
 Zicong Wang <wangzicong@nudt.edu.cn>
-Éder F. Zulian <zulian@eit.uni-kl.de>
-Gabe Loh <gabriel.loh@amd.com> gloh <none@none>
-jiegec <noc@jiegec.ac.cn>
-m5test <m5test@zizzer>
-Marjan Fariborz <mfariborz@ucdavis.edu> marjanfariborz <mfariborz@ucdavis.edu>
-Mike Upton <michaelupton@gmail.com>
-seanzw <seanyukigeek@gmail.com>
-Trivikram Reddy <tvreddy@ucdavis.edu> tv-reddy <tvreddy@ucdavis.edu>
-
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..8cbc6af
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,89 @@
+# Copyright (c) 2022 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+minimum_pre_commit_version: "2.18"
+
+default_language_version:
+  python: python3
+
+exclude: |
+  (?x)^(
+    ext/.*|
+    build/.*|
+    src/systemc/ext/.*|
+    src/systemc/tests/.*/.*|
+    src/python/m5/ext/pyfdt/.*|
+    tests/.*/ref/.*
+  )$
+
+default_stages: [commit]
+
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.3.0
+  hooks:
+  - id: trailing-whitespace
+  - id: end-of-file-fixer
+  - id: check-json
+  - id: check-yaml
+  - id: check-added-large-files
+  - id: mixed-line-ending
+    args: [--fix=lf]
+  - id: check-case-conflict
+- repo: https://github.com/psf/black
+  rev: 22.6.0
+  hooks:
+    - id: black
+- repo: local
+  hooks:
+  - id: gem5-style-checker
+    name: gem5 style checker
+    entry: util/git-pre-commit.py
+    always_run: true
+    exclude: ".*"
+    language: system
+    description: 'The gem5 style checker hook.'
+  - id: gem5-commit-msg-checker
+    name: gem5 commit msg checker
+    entry: ext/git-commit-msg
+    language: system
+    stages: [commit-msg]
+    description: 'The gem5 commit message checker hook.'
+  - id: gerrit-commit-msg-job
+    name: gerrit commit message job
+    entry: util/gerrit-commit-msg-hook
+    language: system
+    stages: [commit-msg]
+    description: 'Adds Change-ID to the commit message. Needed by Gerrit.'
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 189b63f..ae771d3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -136,6 +136,37 @@
 
 [git book]: https://git-scm.com/book/en/v2/Git-Branching-Rebasing
 
+
+Setting up pre-commit
+---------------------
+
+To help ensure the gem5 style guide is maintained, we use [pre-commit](
+https://pre-commit.com) to run checks on changes to be contributed.
+
+To setup pre-commit, run the following in your gem5 directory to install the
+pre-commit and commit message hooks.
+
+```sh
+pip install pre-commit
+pre-commit install -t pre-commit -t commit-msg
+```
+
+The hooks are also automatically installed when gem5 is compiled.
+
+When you run a `git commit` command the pre-commit hook will run checks on your
+committed code. The commit will be blocked if a check fails.
+
+The same checks are run as part of Gerrit's CI tests (those required to obtain
+a Verified label, necessary for a change to be accepted to the develop branch).
+Therefore setting up pre-commit in your local gem5 development environment is
+recommended.
+
+You can automatically format files to pass the pre-commit tests by running:
+
+```sh
+pre-commit run --files <files to format>
+```
+
 Requirements for change descriptions
 ------------------------------------
 To help reviewers and future contributors more easily understand and track
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 2353a96..931be69 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,3 +1,121 @@
+# Version 22.1.0.0
+
+This release has 500 contributions from 48 unique contributors and marks our second major release of 2022.
+This release incorporates several new features, improvements, and bug fixes for the computer architecture reserach community.
+
+See below for more details!
+
+## New features and improvements
+
+- The gem5 binary can now be compiled to include multiple ISA targets.
+A compilation of gem5 which includes all gem5 ISAs can be created using: `scons build/ALL/gem5.opt`.
+This will use the Ruby `MESI_Two_Level` cache coherence protocol by default, to use other protocols: `scons build/ALL/gem5.opt PROTOCOL=<other protocol>`.
+The classic cache system may continue to be used regardless as to which Ruby cache coherence protocol is compiled.
+- The `m5` Python module now includes functions to set exit events are particular simululation ticks:
+    - *setMaxTick(tick)* : Used to to specify the maximum simulation tick.
+    - *getMaxTick()* : Used to obtain the maximum simulation tick value.
+    - *getTicksUntilMax()*: Used to get the number of ticks remaining until the maximum tick is reached.
+    - *scheduleTickExitFromCurrent(tick)* : Used to schedule an exit exit event a specified number of ticks in the future.
+    - *scheduleTickExitAbsolute(tick)* : Used to schedule an exit event as a specified tick.
+- We now include the `RiscvMatched` board as part of the gem5 stdlib.
+This board is modeled after the [HiFive Unmatched board](https://www.sifive.com/boards/hifive-unmatched) and may be used to emulate its behavior.
+See "configs/example/gem5_library/riscv-matched-fs.py" and "configs/example/gem5_library/riscv-matched-hello.py" for examples using this board.
+- An API for [SimPoints](https://doi.org/10.1145/885651.781076) has been added.
+SimPoints can substantially improve gem5 Simulation time by only simulating representative parts of a simulation then extrapolating statistical data accordingly.
+Examples of using SimPoints with gem5 can be found in "configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py" and "configs/example/gem5_library/checkpoints/simpoints-se-restore.py".
+- "Workloads" have been introduced to gem5.
+Workloads have been incorporated into the gem5 Standard library.
+They can be used specify the software to be run on a simulated system that come complete with input parameters and any other dependencies necessary to run a simuation on the target hardware.
+At the level of the gem5 configuration script a user may specify a workload via a board's `set_workload` function.
+For example, `set_workload(Workload("x86-ubuntu-18.04-boot"))` sets the board to use the "x86-ubuntu-18.04-boot" workload.
+This workload specifies a boot consisting of the Linux 5.4.49 kernel then booting an Ubunutu 18.04 disk image, to exit upon booting.
+Workloads are agnostic to underlying gem5 design and, via the gem5-resources infrastructure, will automatically retrieve all necessary kernels, disk-images, etc., necessary to execute.
+Examples of using gem5 Workloads can be found in "configs/example/gem5_library/x86-ubuntu-ruby.py" and "configs/example/gem5_library/riscv-ubuntu-run.py".
+- To aid gem5 developers, we have incorporated [pre-commit](https://pre-commit.com) checks into gem5.
+These checks automatically enforce the gem5 style guide on Python files and a subset of other requirements (such as line length) on altered code prior to a `git commit`.
+Users may install pre-commit by running `./util/pre-commit-install.sh`.
+Passing these checks is a requirement to submit code to gem5 so installation is strongly advised.
+- A multiprocessing module has been added.
+This allows for multiple simulations to be run from a single gem5 execution via a single gem5 configuration script.
+Example of usage found [in this commit message](https://gem5-review.googlesource.com/c/public/gem5/+/63432).
+**Note: This feature is still in development.
+While functional, it'll be subject to subtantial changes in future releases of gem5**.
+- The stdlib's `ArmBoard` now supports Ruby caches.
+- Due to numerious fixes and improvements, Ubuntu 22.04 can be booted as a gem5 workload, both in FS and SE mode.
+- Substantial improvements have been made to gem5's GDB capabilities.
+- The `HBM2Stack` has been added to the gem5 stdlib as a memory component.
+- The `MinorCPU` has been fully incorporated into the gem5 Standard Library.
+- We now allow for full-system simulation of GPU applications.
+The introduction of GPU FS mode allows for the same use-cases as SE mode but reduces the requirement of specific host environments or usage of a Docker container.
+The GPU FS mode also has improved simulated speed by functionally simulating memory copies, and provides an easier update path for gem5 developers.
+An X86 host and KVM are required to run GPU FS mode.
+
+## API (user facing) changes
+
+- The default CPU Vendor String has been updated to `HygonGenuine`.
+This is due to newer versions of GLIBC being more strict about checking current system's supported features.
+The previous value, `M5 Simulator`, is not recognized as a valid vendor string and therefore GLIBC returns an error.
+- [The stdlib's `_connect_things` funciton call has been moved from the `AbstractBoard`'s constructor to be run as board pre-instantiation process](https://gem5-review.googlesource.com/c/public/gem5/+/65051).
+This is to overcome instances where stdlib components (memory, processor, and cache hierarhcy) require Board information known only after its construction.
+**This change breaks cases where a user utilizes the stdlib `AbstractBoard` but does not use the stdlib `Simulator` module. This can be fixed by adding the `_pre_instantiate` function before `m5.instantiate`**.
+An exception has been added which explains this fix, if this error occurs.
+- The setting of checkpoints has been moved from the stdlib's "set_workload" functions to the `Simulator` module.
+Setting of checkpoints via the stdlib's "set_workload" functions is now deprecated and will be removed in future releases of gem5.
+- The gem5 namespace `Trace` has been renamed `trace` to conform to the gem5 style guide.
+- Due to the allowing of multiple ISAs per gem5 build, the `TARGET_ISA`  variable has been replaced with `USE_$(ISA)` variables.
+For example, if a build contains both the X86 and ARM ISAs the `USE_X86` and `USE_ARM` variables will be set.
+
+## Big Fixes
+
+- Several compounding bugs were causing bugs with floating point operations within gem5 simulations.
+These have been fixed.
+- Certain emulated syscalls were behaving incorrectly when using RISC-V due to incorrect `open(2)` flag values.
+These values have been fixed.
+- The GIVv3 List register mapping has been fixed.
+- Access permissions for GICv3 cpu registers have been fixed.
+- In previous releases of gem5 the `sim_quantum` value was set for all cores when using the Standard Library.
+This caused issues when setting exit events at a particular tick as it resulted in the exit being off by `sim_quantum`.
+As such, the `sim_quantum` value is only when using KVM cores.
+- PCI ranges in `VExpress_GEM5_Foundation` fixed.
+- The `SwitchableProcessor` processor has been fixed to allow switching to a KVM core.
+Previously the `SwitchableProcessor` only allowed a user to switch from a KVM core to a non-KVM core.
+- The Standard Library has been fixed to permit multicore simulations in SE mode.
+- [A bug was fixed in the rcr X86 instruction](https://gem5.atlassian.net/browse/GEM5-1265).
+
+## Build related changes
+
+- gem5 can now be compiled with Scons 4 build system.
+- gem5 can now be compiled with Clang version 14 (minimum Clang version 6).
+- gem5 can now be compiled with GCC Version 12 (minimum GCC version 7).
+
+
+## Other minor updates
+
+- The gem5 stdlib examples in "configs/example/gem5_library" have been updated to, where appropriate, use the stdlib's Simulator module.
+These example configurations can be used for reference as to how `Simulator` module may be utilized in gem5.
+- Granulated SGPR computation has been added for gfx9 gpu-compute.
+- The stdlib statistics have been improved:
+    - A `get_simstats` function has been added to access statistics from the `Simulator` module.
+    - Statistics can be printed: `print(simstats.board.core.some_integer)`.
+- GDB ports are now specified for each workload, as opposed to per-simulation run.
+- The `m5` utility has been expanded to include "workbegin" and "workend" annotations.
+This can be added with `m5 workbegin` and `m5 workend`.
+- A `PrivateL1SharedL2CacheHierarchy` has been added to the Standard Library.
+- A `GEM5_USE_PROXY` environment variable has been added.
+This allows users to specify a socks5 proxy server to use when obtaining gem5 resources and the resources.json file.
+It uses the format `<host>:<port>`.
+- The fastmodel support has been improved to function with Linux Kernel 5.x.
+- The `set_se_binary_workload` function now allows for the passing of input parameters to a binary workload.
+- A functional CHI cache hierarchy has been added to the gem5 Standard Library: "src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py".
+- The RISC-V K extension has been added.
+It includes the following instructions:
+  - Zbkx: xperm8, xperm4
+  - Zknd: aes64ds, aes64dsm, aes64im, aes64ks1i, aes64ks2
+  - Zkne: aes64es, aes64esm, aes64ks1i, aes64ks2
+  - Zknh: sha256sig0, sha256sig1, sha256sum0, sha256sum1, sha512sig0, sha512sig1, sha512sum0, sha512sum1
+  - Zksed: sm4ed, sm4ks
+  - Zksh: sm3p0, sm3p1
+
 # Version 22.0.0.2
 
 **[HOTFIX]** This hotfix contains a set of critical fixes to be applied to gem5 v22.0.
diff --git a/SConstruct b/SConstruct
index f1f1c64..e8107ea 100755
--- a/SConstruct
+++ b/SConstruct
@@ -319,7 +319,10 @@
         if conf.TryAction(f'@{python_config} --embed')[0]:
             cmd.append('--embed')
 
-    def flag_filter(env, cmd_output):
+    def flag_filter(env, cmd_output, unique=True):
+        # Since this function does not use the `unique` param, one should not
+        # pass any value to this param.
+        assert(unique==True)
         flags = cmd_output.split()
         prefixes = ('-l', '-L', '-I')
         is_useful = lambda x: any(x.startswith(prefix) for prefix in prefixes)
@@ -417,7 +420,6 @@
                     conf.CheckLinkFlag('-Wl,--threads')
                     conf.CheckLinkFlag(
                             '-Wl,--thread-count=%d' % GetOption('num_jobs'))
-
     else:
         error('\n'.join((
               "Don't know what compiler options to use for your compiler.",
diff --git a/TESTING.md b/TESTING.md
index 88d1f29..2273e31 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -15,7 +15,7 @@
 To build and run all the unit tests:
 
 ```shell
-scons build/NULL/unittests.opt
+scons build/ALL/unittests.opt
 ```
 
 All unit tests should be run prior to posting a patch to
@@ -25,20 +25,20 @@
 `src/base/bitunion.test.cc`):
 
 ```shell
-scons build/NULL/base/bitunion.test.opt
-./build/NULL/base/bitunion.test.opt
+scons build/ALL/base/bitunion.test.opt
+./build/ALL/base/bitunion.test.opt
 ```
 
 To list the available test functions from a test file:
 
 ```shell
-./build/NULL/base/bitunion.test.opt --gtest_list_tests
+./build/ALL/base/bitunion.test.opt --gtest_list_tests
 ```
 
 To run a specific test function (e.g., BitUnionData.NormalBitfield):
 
 ```shell
-./build/NULL/base/bitunion.test.opt --gtest_filter=BitUnionData.NormalBitfield
+./build/ALL/base/bitunion.test.opt --gtest_filter=BitUnionData.NormalBitfield
 ```
 
 # Running system-level tests
@@ -246,10 +246,9 @@
 ## Running Tests in Parallel
 
 Whimsy has support for parallel testing baked in. This system supports
-running multiple suites at the same time on the same computer. To run 
+running multiple suites at the same time on the same computer. To run
 suites in parallel, supply the `-t <number-tests>` flag to the run command.
 
 For example, to run up to three test suites at the same time::
 
     ./main.py run --skip-build -t 3
-
diff --git a/build_opts/ALL b/build_opts/ALL
new file mode 100644
index 0000000..6e5ede2
--- /dev/null
+++ b/build_opts/ALL
@@ -0,0 +1,7 @@
+USE_ARM_ISA = True
+USE_MIPS_ISA = True
+USE_POWER_ISA = True
+USE_RISCV_ISA = True
+USE_SPARC_ISA = True
+USE_X86_ISA = True
+PROTOCOL = 'MESI_Two_Level'
diff --git a/build_opts/ARM b/build_opts/ARM
index 5b7da10..8c30c21 100644
--- a/build_opts/ARM
+++ b/build_opts/ARM
@@ -1,2 +1,2 @@
-TARGET_ISA = 'arm'
+USE_ARM_ISA = True
 PROTOCOL = 'CHI'
diff --git a/build_opts/ARM_MESI_Three_Level b/build_opts/ARM_MESI_Three_Level
index 2ca31b6..3057bec 100644
--- a/build_opts/ARM_MESI_Three_Level
+++ b/build_opts/ARM_MESI_Three_Level
@@ -1,5 +1,5 @@
 # Copyright (c) 2019 ARM Limited
 # All rights reserved.
 
-TARGET_ISA = 'arm'
+USE_ARM_ISA = True
 PROTOCOL = 'MESI_Three_Level'
diff --git a/build_opts/ARM_MESI_Three_Level_HTM b/build_opts/ARM_MESI_Three_Level_HTM
index 703398d..7f80c4e 100644
--- a/build_opts/ARM_MESI_Three_Level_HTM
+++ b/build_opts/ARM_MESI_Three_Level_HTM
@@ -1,5 +1,5 @@
 # Copyright (c) 2019 ARM Limited
 # All rights reserved.
 
-TARGET_ISA = 'arm'
+USE_ARM_ISA = True
 PROTOCOL = 'MESI_Three_Level_HTM'
diff --git a/build_opts/ARM_MOESI_hammer b/build_opts/ARM_MOESI_hammer
index bd5c63f..5322fd9 100644
--- a/build_opts/ARM_MOESI_hammer
+++ b/build_opts/ARM_MOESI_hammer
@@ -1,5 +1,5 @@
 # Copyright (c) 2019 ARM Limited
 # All rights reserved.
 
-TARGET_ISA = 'arm'
+USE_ARM_ISA = True
 PROTOCOL = 'MOESI_hammer'
diff --git a/build_opts/GCN3_X86 b/build_opts/GCN3_X86
index b396908..aca2f62 100644
--- a/build_opts/GCN3_X86
+++ b/build_opts/GCN3_X86
@@ -1,4 +1,4 @@
 PROTOCOL = 'GPU_VIPER'
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
 TARGET_GPU_ISA = 'gcn3'
 BUILD_GPU = True
diff --git a/build_opts/Garnet_standalone b/build_opts/Garnet_standalone
index fd730c3..2351c52 100644
--- a/build_opts/Garnet_standalone
+++ b/build_opts/Garnet_standalone
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
 PROTOCOL = 'Garnet_standalone'
diff --git a/build_opts/MIPS b/build_opts/MIPS
index 26cb23c..382e101 100644
--- a/build_opts/MIPS
+++ b/build_opts/MIPS
@@ -1,2 +1,2 @@
-TARGET_ISA = 'mips'
+USE_MIPS_ISA = True
 PROTOCOL = 'MI_example'
diff --git a/build_opts/NULL b/build_opts/NULL
index b749729..51e287a 100644
--- a/build_opts/NULL
+++ b/build_opts/NULL
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
 PROTOCOL='MI_example'
diff --git a/build_opts/NULL_MESI_Two_Level b/build_opts/NULL_MESI_Two_Level
index 09147b2..bafb199 100644
--- a/build_opts/NULL_MESI_Two_Level
+++ b/build_opts/NULL_MESI_Two_Level
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
 PROTOCOL = 'MESI_Two_Level'
diff --git a/build_opts/NULL_MOESI_CMP_directory b/build_opts/NULL_MOESI_CMP_directory
index 466a268..3346964 100644
--- a/build_opts/NULL_MOESI_CMP_directory
+++ b/build_opts/NULL_MOESI_CMP_directory
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
 PROTOCOL='MOESI_CMP_directory'
diff --git a/build_opts/NULL_MOESI_CMP_token b/build_opts/NULL_MOESI_CMP_token
index 0cd0305..4ea9e70 100644
--- a/build_opts/NULL_MOESI_CMP_token
+++ b/build_opts/NULL_MOESI_CMP_token
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
 PROTOCOL='MOESI_CMP_token'
diff --git a/build_opts/NULL_MOESI_hammer b/build_opts/NULL_MOESI_hammer
index 39ebcae..e91b78d 100644
--- a/build_opts/NULL_MOESI_hammer
+++ b/build_opts/NULL_MOESI_hammer
@@ -1,2 +1,2 @@
-TARGET_ISA = 'null'
+USE_NULL_ISA = True
 PROTOCOL='MOESI_hammer'
diff --git a/build_opts/POWER b/build_opts/POWER
index 35772a4..207356c 100644
--- a/build_opts/POWER
+++ b/build_opts/POWER
@@ -1,2 +1,2 @@
-TARGET_ISA = 'power'
+USE_POWER_ISA = True
 PROTOCOL = 'MI_example'
diff --git a/build_opts/RISCV b/build_opts/RISCV
index 0bd069d..22097b0 100644
--- a/build_opts/RISCV
+++ b/build_opts/RISCV
@@ -1,2 +1,2 @@
-TARGET_ISA = 'riscv'
+USE_RISCV_ISA = True
 PROTOCOL = 'MI_example'
diff --git a/build_opts/SPARC b/build_opts/SPARC
index 98acfe2..22dec5f 100644
--- a/build_opts/SPARC
+++ b/build_opts/SPARC
@@ -1,2 +1,2 @@
-TARGET_ISA = 'sparc'
+USE_SPARC_ISA = True
 PROTOCOL = 'MI_example'
diff --git a/build_opts/VEGA_X86 b/build_opts/VEGA_X86
index 11e8232..437b048 100644
--- a/build_opts/VEGA_X86
+++ b/build_opts/VEGA_X86
@@ -1,4 +1,4 @@
 PROTOCOL = 'GPU_VIPER'
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
 TARGET_GPU_ISA = 'vega'
 BUILD_GPU = True
diff --git a/build_opts/X86 b/build_opts/X86
index 72b200a..259325b 100644
--- a/build_opts/X86
+++ b/build_opts/X86
@@ -1,3 +1,3 @@
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
 PROTOCOL = 'MESI_Two_Level'
 NUMBER_BITS_PER_SET = '128'
diff --git a/build_opts/X86_MESI_Two_Level b/build_opts/X86_MESI_Two_Level
index 72b200a..259325b 100644
--- a/build_opts/X86_MESI_Two_Level
+++ b/build_opts/X86_MESI_Two_Level
@@ -1,3 +1,3 @@
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
 PROTOCOL = 'MESI_Two_Level'
 NUMBER_BITS_PER_SET = '128'
diff --git a/build_opts/X86_MI_example b/build_opts/X86_MI_example
index 483cf04..71bc9a5 100644
--- a/build_opts/X86_MI_example
+++ b/build_opts/X86_MI_example
@@ -1,2 +1,2 @@
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
 PROTOCOL = 'MI_example'
diff --git a/build_opts/X86_MOESI_AMD_Base b/build_opts/X86_MOESI_AMD_Base
index 261bedb..f8f2ce7 100644
--- a/build_opts/X86_MOESI_AMD_Base
+++ b/build_opts/X86_MOESI_AMD_Base
@@ -1,2 +1,2 @@
 PROTOCOL = 'MOESI_AMD_Base'
-TARGET_ISA = 'x86'
+USE_X86_ISA = True
diff --git a/build_tools/blob.py b/build_tools/blob.py
index 3d93c45..b3d2d0f 100644
--- a/build_tools/blob.py
+++ b/build_tools/blob.py
@@ -26,16 +26,17 @@
 import array
 import functools
 
+
 def bytesToCppArray(code, symbol, data):
-    '''
+    """
     Output an array of bytes to a code formatter as a c++ array declaration.
-    '''
-    code('const std::uint8_t ${symbol}[] = {')
+    """
+    code("const std::uint8_t ${symbol}[] = {")
     code.indent()
     step = 16
     for i in range(0, len(data), step):
-        x = array.array('B', data[i:i+step])
-        strs = map(lambda i: f'{i},', x)
+        x = array.array("B", data[i : i + step])
+        strs = map(lambda i: f"{i},", x)
         code(functools.reduce(lambda x, y: x + y, strs))
     code.dedent()
-    code('};')
+    code("};")
diff --git a/build_tools/code_formatter.py b/build_tools/code_formatter.py
index 374e8cc..a2651c9 100644
--- a/build_tools/code_formatter.py
+++ b/build_tools/code_formatter.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2022 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2006-2009 Nathan Binkert <nate@binkert.org>
 # All rights reserved.
 #
@@ -33,6 +45,7 @@
 import os
 import re
 
+
 class lookup(object):
     def __init__(self, formatter, frame, *args, **kwargs):
         self.frame = frame
@@ -52,10 +65,10 @@
         if item in self.kwargs:
             return self.kwargs[item]
 
-        if item == '__file__':
+        if item == "__file__":
             return self.frame.f_code.co_filename
 
-        if item == '__line__':
+        if item == "__line__":
             return self.frame.f_lineno
 
         if self.formatter.locals and item in self.frame.f_locals:
@@ -77,6 +90,7 @@
             pass
         raise IndexError("Could not find '%s'" % item)
 
+
 class code_formatter_meta(type):
     pattern = r"""
     (?:
@@ -90,44 +104,48 @@
       %(delim)s(?P<invalid>)                       # ill-formed delimiter exprs
     )
     """
+
     def __init__(cls, name, bases, dct):
         super(code_formatter_meta, cls).__init__(name, bases, dct)
-        if 'pattern' in dct:
+        if "pattern" in dct:
             pat = cls.pattern
         else:
             # tuple expansion to ensure strings are proper length
-            lb,rb = cls.braced
-            lb1,lb2,rb2,rb1 = cls.double_braced
+            lb, rb = cls.braced
+            lb1, lb2, rb2, rb1 = cls.double_braced
             pat = code_formatter_meta.pattern % {
-                'delim' : re.escape(cls.delim),
-                'ident' : cls.ident,
-                'pos' : cls.pos,
-                'lb' : re.escape(lb),
-                'rb' : re.escape(rb),
-                'ldb' : re.escape(lb1+lb2),
-                'rdb' : re.escape(rb2+rb1),
-                }
+                "delim": re.escape(cls.delim),
+                "ident": cls.ident,
+                "pos": cls.pos,
+                "lb": re.escape(lb),
+                "rb": re.escape(rb),
+                "ldb": re.escape(lb1 + lb2),
+                "rdb": re.escape(rb2 + rb1),
+            }
         cls.pattern = re.compile(pat, re.VERBOSE | re.DOTALL | re.MULTILINE)
 
+
 class code_formatter(object, metaclass=code_formatter_meta):
-    delim = r'$'
-    ident = r'[_A-z]\w*'
-    pos = r'[0-9]+'
-    braced = r'{}'
-    double_braced = r'{{}}'
+    delim = r"$"
+    ident = r"[_A-z]\w*"
+    pos = r"[0-9]+"
+    braced = r"{}"
+    double_braced = r"{{}}"
 
     globals = True
     locals = True
     fix_newlines = True
+
     def __init__(self, *args, **kwargs):
         self._data = []
         self._dict = {}
         self._indent_level = 0
         self._indent_spaces = 4
-        self.globals = kwargs.pop('globals', type(self).globals)
-        self.locals = kwargs.pop('locals', type(self).locals)
-        self._fix_newlines = \
-                kwargs.pop('fix_newlines', type(self).fix_newlines)
+        self.globals = kwargs.pop("globals", type(self).globals)
+        self.locals = kwargs.pop("locals", type(self).locals)
+        self._fix_newlines = kwargs.pop(
+            "fix_newlines", type(self).fix_newlines
+        )
 
         if args:
             self.__call__(args)
@@ -159,38 +177,44 @@
         # Add a comment to inform which file generated the generated file
         # to make it easier to backtrack and modify generated code
         frame = inspect.currentframe().f_back
-        if re.match('\.(cc|hh|c|h)', extension) is not None:
-            f.write(f'''/**
+        if re.match(r"^\.(cc|hh|c|h)$", extension) is not None:
+            f.write(
+                f"""/**
  * DO NOT EDIT THIS FILE!
  * File automatically generated by
  *   {frame.f_code.co_filename}:{frame.f_lineno}
  */
 
-''')
-        elif re.match('\.py', extension) is not None:
-            f.write(f'''#
+"""
+            )
+        elif re.match(r"^\.py$", extension) is not None:
+            f.write(
+                f"""#
 # DO NOT EDIT THIS FILE!
 # File automatically generated by
 #   {frame.f_code.co_filename}:{frame.f_lineno}
 #
 
-''')
-        elif re.match('\.html', extension) is not None:
-            f.write(f'''<!--
+"""
+            )
+        elif re.match(r"^\.html$", extension) is not None:
+            f.write(
+                f"""<!--
  DO NOT EDIT THIS FILE!
  File automatically generated by
    {frame.f_code.co_filename}:{frame.f_lineno}
 -->
 
-''')
+"""
+            )
 
         for data in self._data:
             f.write(data)
         f.close()
 
     def __str__(self):
-        data = ''.join(self._data)
-        self._data = [ data ]
+        data = "".join(self._data)
+        self._data = [data]
         return data
 
     def __getitem__(self, item):
@@ -219,21 +243,21 @@
             self._data.append(data)
             return
 
-        initial_newline = not self._data or self._data[-1] == '\n'
+        initial_newline = not self._data or self._data[-1] == "\n"
         for line in data.splitlines():
             if line:
                 if self._indent_level:
-                    self._data.append(' ' * self._indent_level)
+                    self._data.append(" " * self._indent_level)
                 self._data.append(line)
 
             if line or not initial_newline:
-                self._data.append('\n')
+                self._data.append("\n")
 
             initial_newline = False
 
     def __call__(self, *args, **kwargs):
         if not args:
-            self._data.append('\n')
+            self._data.append("\n")
             return
 
         format = args[0]
@@ -242,51 +266,56 @@
         frame = inspect.currentframe().f_back
 
         l = lookup(self, frame, *args, **kwargs)
+
         def convert(match):
-            ident = match.group('lone')
+            ident = match.group("lone")
             # check for a lone identifier
             if ident:
-                indent = match.group('indent') # must be spaces
-                lone = '%s' % (l[ident], )
+                indent = match.group("indent")  # must be spaces
+                lone = "%s" % (l[ident],)
 
                 def indent_lines(gen):
                     for line in gen:
                         yield indent
                         yield line
-                return ''.join(indent_lines(lone.splitlines(True)))
+
+                return "".join(indent_lines(lone.splitlines(True)))
 
             # check for an identifier, braced or not
-            ident = match.group('ident') or match.group('b_ident')
+            ident = match.group("ident") or match.group("b_ident")
             if ident is not None:
-                return '%s' % (l[ident], )
+                return "%s" % (l[ident],)
 
             # check for a positional parameter, braced or not
-            pos = match.group('pos') or match.group('b_pos')
+            pos = match.group("pos") or match.group("b_pos")
             if pos is not None:
                 pos = int(pos)
                 if pos > len(args):
-                    raise ValueError \
-                        ('Positional parameter #%d not found in pattern' % pos,
-                         code_formatter.pattern)
-                return '%s' % (args[int(pos)], )
+                    raise ValueError(
+                        "Positional parameter #%d not found in pattern" % pos,
+                        code_formatter.pattern,
+                    )
+                return "%s" % (args[int(pos)],)
 
             # check for a double braced expression
-            eval_expr = match.group('eval')
+            eval_expr = match.group("eval")
             if eval_expr is not None:
                 result = eval(eval_expr, {}, l)
-                return '%s' % (result, )
+                return "%s" % (result,)
 
             # check for an escaped delimiter
-            if match.group('escaped') is not None:
-                return '$'
+            if match.group("escaped") is not None:
+                return "$"
 
             # At this point, we have to match invalid
-            if match.group('invalid') is None:
+            if match.group("invalid") is None:
                 # didn't match invalid!
-                raise ValueError('Unrecognized named group in pattern',
-                                 code_formatter.pattern)
+                raise ValueError(
+                    "Unrecognized named group in pattern",
+                    code_formatter.pattern,
+                )
 
-            i = match.start('invalid')
+            i = match.start("invalid")
             if i == 0:
                 colno = 1
                 lineno = 1
@@ -295,52 +324,64 @@
                 colno = i - sum(len(z) for z in lines)
                 lineno = len(lines)
 
-                raise ValueError('Invalid format string: line %d, col %d' %
-                                 (lineno, colno))
+                raise ValueError(
+                    "Invalid format string: line %d, col %d" % (lineno, colno)
+                )
 
         d = code_formatter.pattern.sub(convert, format)
         self._append(d)
 
-__all__ = [ "code_formatter" ]
 
-if __name__ == '__main__':
+__all__ = ["code_formatter"]
+
+if __name__ == "__main__":
     from .code_formatter import code_formatter
+
     f = code_formatter()
 
     class Foo(dict):
         def __init__(self, **kwargs):
             self.update(kwargs)
+
         def __getattr__(self, attr):
             return self[attr]
 
     x = "this is a test"
-    l = [ [Foo(x=[Foo(y=9)])] ]
+    l = [[Foo(x=[Foo(y=9)])]]
 
     y = code_formatter()
-    y('''
+    y(
+        """
 {
     this_is_a_test();
 }
-''')
-    f('    $y')
-    f('''$__file__:$__line__
-{''')
+"""
+    )
+    f("    $y")
+    f(
+        """$__file__:$__line__
+{"""
+    )
     f("${{', '.join(str(x) for x in range(4))}}")
-    f('${x}')
-    f('$x')
+    f("${x}")
+    f("$x")
     f.indent()
     for i in range(5):
-        f('$x')
-        f('$i')
-        f('$0', "zero")
-        f('$1 $0', "zero", "one")
-        f('${0}', "he went")
-        f('${0}asdf', "he went")
+        f("$x")
+        f("$i")
+        f("$0", "zero")
+        f("$1 $0", "zero", "one")
+        f("${0}", "he went")
+        f("${0}asdf", "he went")
     f.dedent()
 
-    f('''
+    f(
+        """
     ${{l[0][0]["x"][0].y}}
 }
-''', 1, 9)
+""",
+        1,
+        9,
+    )
 
-    print(f, end=' ')
+    print(f, end=" ")
diff --git a/build_tools/cxx_config_cc.py b/build_tools/cxx_config_cc.py
index c4a2d89..a908aa8 100644
--- a/build_tools/cxx_config_cc.py
+++ b/build_tools/cxx_config_cc.py
@@ -46,8 +46,8 @@
 from code_formatter import code_formatter
 
 parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the simobject belongs to')
-parser.add_argument('cxx_config_cc', help='cxx config cc file to generate')
+parser.add_argument("modpath", help="module the simobject belongs to")
+parser.add_argument("cxx_config_cc", help="cxx config cc file to generate")
 
 args = parser.parse_args()
 
@@ -63,22 +63,25 @@
 
 code = code_formatter()
 
-entry_class = 'CxxConfigDirectoryEntry_%s' % sim_object_name
-param_class = '%sCxxConfigParams' % sim_object_name
+entry_class = "CxxConfigDirectoryEntry_%s" % sim_object_name
+param_class = "%sCxxConfigParams" % sim_object_name
+
 
 def cxx_bool(b):
-    return 'true' if b else 'false'
+    return "true" if b else "false"
+
 
 code('#include "params/%s.hh"' % sim_object_name)
 
 for param in sim_object._params.values():
     if isSimObjectClass(param.ptype):
-        code('#include "%s"' % param.ptype._value_dict['cxx_header'])
+        code('#include "%s"' % param.ptype._value_dict["cxx_header"])
         code('#include "params/%s.hh"' % param.ptype.__name__)
     else:
         param.ptype.cxx_ini_predecls(code)
 
-code('''#include "${{sim_object._value_dict['cxx_header']}}"
+code(
+    """#include "${{sim_object._value_dict['cxx_header']}}"
 #include "base/str.hh"
 #include "cxx_config/${sim_object_name}.hh"
 
@@ -87,34 +90,39 @@
 
 ${param_class}::DirectoryEntry::DirectoryEntry()
 {
-''')
+"""
+)
 code.indent()
 for param in sim_object._params.values():
     is_vector = isinstance(param, m5.params.VectorParamDesc)
     is_simobj = issubclass(param.ptype, m5.SimObject.SimObject)
 
-    code('parameters["%s"] = new ParamDesc("%s", %s, %s);' %
-        (param.name, param.name, cxx_bool(is_vector),
-        cxx_bool(is_simobj)));
+    code(
+        'parameters["%s"] = new ParamDesc("%s", %s, %s);'
+        % (param.name, param.name, cxx_bool(is_vector), cxx_bool(is_simobj))
+    )
 
 for port in sim_object._ports.values():
     is_vector = isinstance(port, m5.params.VectorPort)
-    is_requestor = port.role == 'GEM5 REQUESTOR'
+    is_requestor = port.role == "GEM5 REQUESTOR"
 
-    code('ports["%s"] = new PortDesc("%s", %s, %s);' %
-        (port.name, port.name, cxx_bool(is_vector),
-        cxx_bool(is_requestor)))
+    code(
+        'ports["%s"] = new PortDesc("%s", %s, %s);'
+        % (port.name, port.name, cxx_bool(is_vector), cxx_bool(is_requestor))
+    )
 
 code.dedent()
 
-code('''}
+code(
+    """}
 
 bool
 ${param_class}::setSimObject(const std::string &name, SimObject *simObject)
 {
     bool ret = true;
     if (false) {
-''')
+"""
+)
 
 code.indent()
 for param in sim_object._params.values():
@@ -124,14 +132,17 @@
     if is_simobj and not is_vector:
         code('} else if (name == "${{param.name}}") {')
         code.indent()
-        code('this->${{param.name}} = '
-            'dynamic_cast<${{param.ptype.cxx_type}}>(simObject);')
-        code('if (simObject && !this->${{param.name}})')
-        code('   ret = false;')
+        code(
+            "this->${{param.name}} = "
+            "dynamic_cast<${{param.ptype.cxx_type}}>(simObject);"
+        )
+        code("if (simObject && !this->${{param.name}})")
+        code("   ret = false;")
         code.dedent()
 code.dedent()
 
-code('''
+code(
+    """
     } else {
         ret = false;
     }
@@ -146,7 +157,8 @@
     bool ret = true;
 
     if (false) {
-''')
+"""
+)
 
 code.indent()
 for param in sim_object._params.values():
@@ -156,23 +168,28 @@
     if is_simobj and is_vector:
         code('} else if (name == "${{param.name}}") {')
         code.indent()
-        code('this->${{param.name}}.clear();')
-        code('for (auto i = simObjects.begin(); '
-            'ret && i != simObjects.end(); i ++)')
-        code('{')
+        code("this->${{param.name}}.clear();")
+        code(
+            "for (auto i = simObjects.begin(); "
+            "ret && i != simObjects.end(); i ++)"
+        )
+        code("{")
         code.indent()
-        code('${{param.ptype.cxx_type}} object = '
-            'dynamic_cast<${{param.ptype.cxx_type}}>(*i);')
-        code('if (*i && !object)')
-        code('    ret = false;')
-        code('else')
-        code('    this->${{param.name}}.push_back(object);')
+        code(
+            "${{param.ptype.cxx_type}} object = "
+            "dynamic_cast<${{param.ptype.cxx_type}}>(*i);"
+        )
+        code("if (*i && !object)")
+        code("    ret = false;")
+        code("else")
+        code("    this->${{param.name}}.push_back(object);")
         code.dedent()
-        code('}')
+        code("}")
         code.dedent()
 code.dedent()
 
-code('''
+code(
+    """
     } else {
         ret = false;
     }
@@ -193,7 +210,8 @@
     bool ret = true;
 
     if (false) {
-''')
+"""
+)
 
 code.indent()
 for param in sim_object._params.values():
@@ -203,12 +221,14 @@
     if not is_simobj and not is_vector:
         code('} else if (name == "${{param.name}}") {')
         code.indent()
-        param.ptype.cxx_ini_parse(code,
-            'value', 'this->%s' % param.name, 'ret =')
+        param.ptype.cxx_ini_parse(
+            code, "value", "this->%s" % param.name, "ret ="
+        )
         code.dedent()
 code.dedent()
 
-code('''
+code(
+    """
     } else {
         ret = false;
     }
@@ -223,7 +243,8 @@
     bool ret = true;
 
     if (false) {
-''')
+"""
+)
 
 code.indent()
 for param in sim_object._params.values():
@@ -233,22 +254,23 @@
     if not is_simobj and is_vector:
         code('} else if (name == "${{param.name}}") {')
         code.indent()
-        code('${{param.name}}.clear();')
-        code('for (auto i = values.begin(); '
-            'ret && i != values.end(); i ++)')
-        code('{')
+        code("${{param.name}}.clear();")
+        code(
+            "for (auto i = values.begin(); " "ret && i != values.end(); i ++)"
+        )
+        code("{")
         code.indent()
-        code('${{param.ptype.cxx_type}} elem;')
-        param.ptype.cxx_ini_parse(code,
-            '*i', 'elem', 'ret =')
-        code('if (ret)')
-        code('    this->${{param.name}}.push_back(elem);')
+        code("${{param.ptype.cxx_type}} elem;")
+        param.ptype.cxx_ini_parse(code, "*i", "elem", "ret =")
+        code("if (ret)")
+        code("    this->${{param.name}}.push_back(elem);")
         code.dedent()
-        code('}')
+        code("}")
         code.dedent()
 code.dedent()
 
-code('''
+code(
+    """
     } else {
         ret = false;
     }
@@ -263,15 +285,17 @@
     bool ret = true;
 
     if (false) {
-''')
+"""
+)
 
 code.indent()
 for port in sim_object._ports.values():
     code('} else if (name == "${{port.name}}") {')
-    code('    this->port_${{port.name}}_connection_count = count;')
+    code("    this->port_${{port.name}}_connection_count = count;")
 code.dedent()
 
-code('''
+code(
+    """
     } else {
         ret = false;
     }
@@ -282,18 +306,21 @@
 SimObject *
 ${param_class}::simObjectCreate()
 {
-''')
+"""
+)
 
 code.indent()
-if hasattr(sim_object, 'abstract') and sim_object.abstract:
-    code('return nullptr;')
+if hasattr(sim_object, "abstract") and sim_object.abstract:
+    code("return nullptr;")
 else:
-    code('return this->create();')
+    code("return this->create();")
 code.dedent()
 
-code('''}
+code(
+    """}
 
 } // namespace gem5
-''')
+"""
+)
 
 code.write(args.cxx_config_cc)
diff --git a/build_tools/cxx_config_hh.py b/build_tools/cxx_config_hh.py
index 652c488..55828e3 100644
--- a/build_tools/cxx_config_hh.py
+++ b/build_tools/cxx_config_hh.py
@@ -46,8 +46,8 @@
 from code_formatter import code_formatter
 
 parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the simobject belongs to')
-parser.add_argument('cxx_config_hh', help='cxx config header file to generate')
+parser.add_argument("modpath", help="module the simobject belongs to")
+parser.add_argument("cxx_config_hh", help="cxx config header file to generate")
 
 args = parser.parse_args()
 
@@ -60,10 +60,11 @@
 
 code = code_formatter()
 
-entry_class = 'CxxConfigDirectoryEntry_%s' % sim_object_name
-param_class = '%sCxxConfigParams' % sim_object_name
+entry_class = "CxxConfigDirectoryEntry_%s" % sim_object_name
+param_class = "%sCxxConfigParams" % sim_object_name
 
-code('''#include "params/${sim_object_name}.hh"
+code(
+    """#include "params/${sim_object_name}.hh"
 
 #include "sim/cxx_config.hh"
 
@@ -110,6 +111,7 @@
 };
 
 } // namespace gem5
-''')
+"""
+)
 
 code.write(args.cxx_config_hh)
diff --git a/build_tools/debugflaghh.py b/build_tools/debugflaghh.py
index fc86cb0..2e861e2 100644
--- a/build_tools/debugflaghh.py
+++ b/build_tools/debugflaghh.py
@@ -44,35 +44,41 @@
 parser.add_argument("hh", help="the path of the debug flag header file")
 parser.add_argument("name", help="the name of the debug flag")
 parser.add_argument("desc", help="a description of the debug flag")
-parser.add_argument("fmt",
-        help="whether the flag is a format flag (True or False)")
-parser.add_argument("components",
-        help="components of a compound flag, if applicable, joined with :")
+parser.add_argument(
+    "fmt", help="whether the flag is a format flag (True or False)"
+)
+parser.add_argument(
+    "components",
+    help="components of a compound flag, if applicable, joined with :",
+)
 
 args = parser.parse_args()
 
 fmt = args.fmt.lower()
-if fmt == 'true':
+if fmt == "true":
     fmt = True
-elif fmt == 'false':
+elif fmt == "false":
     fmt = False
 else:
     print(f'Unrecognized "FMT" value {fmt}', file=sys.stderr)
     sys.exit(1)
-components = args.components.split(':') if args.components else []
+components = args.components.split(":") if args.components else []
 
 code = code_formatter()
 
-code('''
+code(
+    """
 #ifndef __DEBUG_${{args.name}}_HH__
 #define __DEBUG_${{args.name}}_HH__
 
 #include "base/compiler.hh" // For namespace deprecation
 #include "base/debug.hh"
-''')
+"""
+)
 for flag in components:
     code('#include "debug/${flag}.hh"')
-code('''
+code(
+    """
 namespace gem5
 {
 
@@ -82,14 +88,16 @@
 
 namespace unions
 {
-''')
+"""
+)
 
 # Use unions to prevent debug flags from being destructed. It's the
 # responsibility of the programmer to handle object destruction for members
 # of the union. We purposefully leave that destructor empty so that we can
 # use debug flags even in the destructors of other objects.
 if components:
-    code('''
+    code(
+        """
 inline union ${{args.name}}
 {
     ~${{args.name}}() {}
@@ -100,9 +108,11 @@
         }
     };
 } ${{args.name}};
-''')
+"""
+    )
 else:
-    code('''
+    code(
+        """
 inline union ${{args.name}}
 {
     ~${{args.name}}() {}
@@ -110,18 +120,21 @@
         "${{args.name}}", "${{args.desc}}", ${{"true" if fmt else "false"}}
     };
 } ${{args.name}};
-''')
+"""
+    )
 
-code('''
+code(
+    """
 } // namespace unions
 
-inline constexpr const auto& ${{args.name}} = 
+inline constexpr const auto& ${{args.name}} =
     ::gem5::debug::unions::${{args.name}}.${{args.name}};
 
 } // namespace debug
 } // namespace gem5
 
 #endif // __DEBUG_${{args.name}}_HH__
-''')
+"""
+)
 
 code.write(args.hh)
diff --git a/build_tools/enum_cc.py b/build_tools/enum_cc.py
index c706ffe..476e49d 100644
--- a/build_tools/enum_cc.py
+++ b/build_tools/enum_cc.py
@@ -46,17 +46,18 @@
 from code_formatter import code_formatter
 
 parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the enum belongs to')
-parser.add_argument('enum_cc', help='enum cc file to generate')
-parser.add_argument('use_python',
-        help='whether python is enabled in gem5 (True or False)')
+parser.add_argument("modpath", help="module the enum belongs to")
+parser.add_argument("enum_cc", help="enum cc file to generate")
+parser.add_argument(
+    "use_python", help="whether python is enabled in gem5 (True or False)"
+)
 
 args = parser.parse_args()
 
 use_python = args.use_python.lower()
-if use_python == 'true':
+if use_python == "true":
     use_python = True
-elif use_python == 'false':
+elif use_python == "false":
     use_python = False
 else:
     print(f'Unrecognized "use_python" value {use_python}', file=sys.stderr)
@@ -75,41 +76,46 @@
 file_name = enum.__name__
 name = enum.__name__ if enum.enum_name is None else enum.enum_name
 
-code('''#include "base/compiler.hh"
+code(
+    """#include "base/compiler.hh"
 #include "enums/$file_name.hh"
 
 namespace gem5
 {
 
-''')
+"""
+)
 
 if enum.wrapper_is_struct:
-    code('const char *${wrapper_name}::${name}Strings'
-        '[Num_${name}] =')
+    code("const char *${wrapper_name}::${name}Strings" "[Num_${name}] =")
 else:
     if enum.is_class:
-        code('''\
+        code(
+            """\
 const char *${name}Strings[static_cast<int>(${name}::Num_${name})] =
-''')
+"""
+        )
     else:
-        code('''GEM5_DEPRECATED_NAMESPACE(Enums, enums);
+        code(
+            """GEM5_DEPRECATED_NAMESPACE(Enums, enums);
 namespace enums
-{''')
+{"""
+        )
         code.indent(1)
-        code('const char *${name}Strings[Num_${name}] =')
+        code("const char *${name}Strings[Num_${name}] =")
 
-code('{')
+code("{")
 code.indent(1)
 for val in enum.vals:
     code('"$val",')
 code.dedent(1)
-code('};')
+code("};")
 
 if not enum.wrapper_is_struct and not enum.is_class:
     code.dedent(1)
-    code('} // namespace enums')
+    code("} // namespace enums")
 
-code('} // namespace gem5')
+code("} // namespace gem5")
 
 
 if use_python:
@@ -118,7 +124,8 @@
     enum_name = enum.__name__ if enum.enum_name is None else enum.enum_name
     wrapper_name = enum_name if enum.is_class else enum.wrapper_name
 
-    code('''#include "pybind11/pybind11.h"
+    code(
+        """#include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
 #include <sim/init.hh>
@@ -133,7 +140,8 @@
 {
     py::module_ m = m_internal.def_submodule("enum_${name}");
 
-''')
+"""
+    )
     if enum.is_class:
         code('py::enum_<${enum_name}>(m, "enum_${name}")')
     else:
@@ -145,16 +153,18 @@
         code('.value("${val}", ${wrapper_name}::${val})')
     code('.value("Num_${name}", ${wrapper_name}::Num_${enum_name})')
     if not enum.is_class:
-        code('.export_values()')
-    code(';')
+        code(".export_values()")
+    code(";")
     code.dedent()
 
-    code('}')
+    code("}")
     code.dedent()
-    code('''
+    code(
+        """
 static EmbeddedPyBind embed_enum("enum_${name}", module_init);
 
 } // namespace gem5
-    ''')
+    """
+    )
 
 code.write(args.enum_cc)
diff --git a/build_tools/enum_hh.py b/build_tools/enum_hh.py
index 2c4a7bb..a5b9f42 100644
--- a/build_tools/enum_hh.py
+++ b/build_tools/enum_hh.py
@@ -46,8 +46,8 @@
 from code_formatter import code_formatter
 
 parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the enum belongs to')
-parser.add_argument('enum_hh', help='enum header file to generate')
+parser.add_argument("modpath", help="module the enum belongs to")
+parser.add_argument("enum_hh", help="enum header file to generate")
 
 args = parser.parse_args()
 
@@ -64,53 +64,61 @@
 # Note that we wrap the enum in a class/struct to act as a namespace,
 # so that the enum strings can be brief w/o worrying about collisions.
 wrapper_name = enum.wrapper_name
-wrapper = 'struct' if enum.wrapper_is_struct else 'namespace'
+wrapper = "struct" if enum.wrapper_is_struct else "namespace"
 name = enum.__name__ if enum.enum_name is None else enum.enum_name
-idem_macro = '__ENUM__%s__%s__' % (wrapper_name, name)
+idem_macro = "__ENUM__%s__%s__" % (wrapper_name, name)
 
-code('''\
+code(
+    """\
 #ifndef $idem_macro
 #define $idem_macro
 
 namespace gem5
 {
-''')
+"""
+)
 if enum.is_class:
-    code('''\
+    code(
+        """\
 enum class $name
 {
-''')
+"""
+    )
 else:
-    code('''\
+    code(
+        """\
 $wrapper $wrapper_name {
 enum $name
 {
-''')
+"""
+    )
     code.indent(1)
 code.indent(1)
 for val in enum.vals:
-    code('$val = ${{enum.map[val]}},')
-code('Num_$name = ${{len(enum.vals)}}')
+    code("$val = ${{enum.map[val]}},")
+code("Num_$name = ${{len(enum.vals)}}")
 code.dedent(1)
-code('};')
+code("};")
 
 if enum.is_class:
-    code('''\
+    code(
+        """\
 extern const char *${name}Strings[static_cast<int>(${name}::Num_${name})];
-''')
+"""
+    )
 elif enum.wrapper_is_struct:
-    code('static const char *${name}Strings[Num_${name}];')
+    code("static const char *${name}Strings[Num_${name}];")
 else:
-    code('extern const char *${name}Strings[Num_${name}];')
+    code("extern const char *${name}Strings[Num_${name}];")
 
 if not enum.is_class:
     code.dedent(1)
-    code('}; // $wrapper_name')
+    code("}; // $wrapper_name")
 
 code()
-code('} // namespace gem5')
+code("} // namespace gem5")
 
 code()
-code('#endif // $idem_macro')
+code("#endif // $idem_macro")
 
 code.write(args.enum_hh)
diff --git a/build_tools/grammar.py b/build_tools/grammar.py
index 9aba746..6ac638b 100644
--- a/build_tools/grammar.py
+++ b/build_tools/grammar.py
@@ -29,73 +29,77 @@
 import ply.lex
 import ply.yacc
 
+
 class ParseError(Exception):
     def __init__(self, message, token=None):
         Exception.__init__(self, message)
         self.token = token
 
+
 class Grammar(object):
     def setupLexerFactory(self, **kwargs):
-        if 'module' in kwargs:
+        if "module" in kwargs:
             raise AttributeError("module is an illegal attribute")
         self.lex_kwargs = kwargs
 
     def setupParserFactory(self, **kwargs):
-        if 'module' in kwargs:
+        if "module" in kwargs:
             raise AttributeError("module is an illegal attribute")
 
-        if 'output' in kwargs:
-            dir,tab = os.path.split(output)
-            if not tab.endswith('.py'):
-                raise AttributeError('The output file must end with .py')
-            kwargs['outputdir'] = dir
-            kwargs['tabmodule'] = tab[:-3]
+        if "output" in kwargs:
+            dir, tab = os.path.split(output)
+            if not tab.endswith(".py"):
+                raise AttributeError("The output file must end with .py")
+            kwargs["outputdir"] = dir
+            kwargs["tabmodule"] = tab[:-3]
 
         self.yacc_kwargs = kwargs
 
     def __getattr__(self, attr):
-        if attr == 'lexers':
+        if attr == "lexers":
             self.lexers = []
             return self.lexers
 
-        if attr == 'lex_kwargs':
+        if attr == "lex_kwargs":
             self.setupLexerFactory()
             return self.lex_kwargs
 
-        if attr == 'yacc_kwargs':
+        if attr == "yacc_kwargs":
             self.setupParserFactory()
             return self.yacc_kwargs
 
-        if attr == 'lex':
+        if attr == "lex":
             self.lex = ply.lex.lex(module=self, **self.lex_kwargs)
             return self.lex
 
-        if attr == 'yacc':
+        if attr == "yacc":
             self.yacc = ply.yacc.yacc(module=self, **self.yacc_kwargs)
             return self.yacc
 
-        if attr == 'current_lexer':
+        if attr == "current_lexer":
             if not self.lexers:
                 return None
             return self.lexers[-1][0]
 
-        if attr == 'current_source':
+        if attr == "current_source":
             if not self.lexers:
-                return '<none>'
+                return "<none>"
             return self.lexers[-1][1]
 
-        if attr == 'current_line':
+        if attr == "current_line":
             if not self.lexers:
                 return -1
             return self.current_lexer.lineno
 
         raise AttributeError(
-            "'%s' object has no attribute '%s'" % (type(self), attr))
+            "'%s' object has no attribute '%s'" % (type(self), attr)
+        )
 
-    def parse_string(self, data, source='<string>', debug=None, tracking=0):
+    def parse_string(self, data, source="<string>", debug=None, tracking=0):
         if not isinstance(data, str):
             raise AttributeError(
-                "argument must be a string, was '%s'" % type(f))
+                "argument must be a string, was '%s'" % type(f)
+            )
 
         lexer = self.lex.clone()
         lexer.input(data)
@@ -114,24 +118,32 @@
     def parse_file(self, f, **kwargs):
         if isinstance(f, str):
             source = f
-            f = open(f, 'r')
+            f = open(f, "r")
         elif isinstance(f, file):
             source = f.name
         else:
             raise AttributeError(
-                "argument must be either a string or file, was '%s'" % type(f))
+                "argument must be either a string or file, was '%s'" % type(f)
+            )
 
         return self.parse_string(f.read(), source, **kwargs)
 
     def p_error(self, t):
         if t:
-            msg = "Syntax error at %s:%d:%d\n>>%s<<" % \
-                  (self.current_source, t.lineno, t.lexpos + 1, t.value)
+            msg = "Syntax error at %s:%d:%d\n>>%s<<" % (
+                self.current_source,
+                t.lineno,
+                t.lexpos + 1,
+                t.value,
+            )
         else:
-            msg = "Syntax error at end of %s" % (self.current_source, )
+            msg = "Syntax error at end of %s" % (self.current_source,)
         raise ParseError(msg, t)
 
     def t_error(self, t):
-        msg = "Illegal character %s @ %d:%d" % \
-            (repr(t.value[0]), t.lineno, t.lexpos)
+        msg = "Illegal character %s @ %d:%d" % (
+            repr(t.value[0]),
+            t.lineno,
+            t.lexpos,
+        )
         raise ParseError(msg, t)
diff --git a/build_tools/infopy.py b/build_tools/infopy.py
index a58cf39..4f15f24 100644
--- a/build_tools/infopy.py
+++ b/build_tools/infopy.py
@@ -42,8 +42,8 @@
 from code_formatter import code_formatter
 
 parser = argparse.ArgumentParser()
-parser.add_argument('info_py', help='info.py file path')
-parser.add_argument('files', help='file to include in info.py', nargs='*')
+parser.add_argument("info_py", help="info.py file path")
+parser.add_argument("files", help="file to include in info.py", nargs="*")
 
 args = parser.parse_args()
 
@@ -52,8 +52,8 @@
 
 for source in args.files:
     src = os.path.basename(source)
-    with open(source, 'r') as f:
-        data = ''.join(f)
-    code('${src} = ${{repr(data)}}')
+    with open(source, "r") as f:
+        data = "".join(f)
+    code("${src} = ${{repr(data)}}")
 
 code.write(args.info_py)
diff --git a/build_tools/marshal.py b/build_tools/marshal.py
index 9c2964b..18afe2c 100644
--- a/build_tools/marshal.py
+++ b/build_tools/marshal.py
@@ -67,16 +67,17 @@
 
 _, cpp, python, modpath, abspath = sys.argv
 
-with open(python, 'r') as f:
+with open(python, "r") as f:
     src = f.read()
 
-compiled = compile(src, python, 'exec')
+compiled = compile(src, python, "exec")
 marshalled = marshal.dumps(compiled)
 
 compressed = zlib.compress(marshalled)
 
 code = code_formatter()
-code('''\
+code(
+    """\
 #include "python/embedded.hh"
 
 namespace gem5
@@ -84,14 +85,16 @@
 namespace
 {
 
-''')
+"""
+)
 
-bytesToCppArray(code, 'embedded_module_data', compressed)
+bytesToCppArray(code, "embedded_module_data", compressed)
 
 # The name of the EmbeddedPython object doesn't matter since it's in an
 # anonymous namespace, and it's constructor takes care of installing it into a
 # global list.
-code('''
+code(
+    """
 EmbeddedPython embedded_module_info(
     "${abspath}",
     "${modpath}",
@@ -101,6 +104,7 @@
 
 } // anonymous namespace
 } // namespace gem5
-''')
+"""
+)
 
 code.write(cpp)
diff --git a/build_tools/sim_object_param_struct_cc.py b/build_tools/sim_object_param_struct_cc.py
index 1b72e3c..0384809 100644
--- a/build_tools/sim_object_param_struct_cc.py
+++ b/build_tools/sim_object_param_struct_cc.py
@@ -46,17 +46,18 @@
 from code_formatter import code_formatter
 
 parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the simobject belongs to')
-parser.add_argument('param_cc', help='parameter cc file to generate')
-parser.add_argument('use_python',
-        help='whether python is enabled in gem5 (True or False)')
+parser.add_argument("modpath", help="module the simobject belongs to")
+parser.add_argument("param_cc", help="parameter cc file to generate")
+parser.add_argument(
+    "use_python", help="whether python is enabled in gem5 (True or False)"
+)
 
 args = parser.parse_args()
 
 use_python = args.use_python.lower()
-if use_python == 'true':
+if use_python == "true":
     use_python = True
-elif use_python == 'false':
+elif use_python == "false":
     use_python = False
 else:
     print(f'Unrecognized "use_python" value {use_python}', file=sys.stderr)
@@ -64,7 +65,7 @@
 
 basename = os.path.basename(args.param_cc)
 no_ext = os.path.splitext(basename)[0]
-sim_object_name = '_'.join(no_ext.split('_')[1:])
+sim_object_name = "_".join(no_ext.split("_")[1:])
 
 importer.install()
 module = importlib.import_module(args.modpath)
@@ -80,14 +81,16 @@
 # the object itself, not including inherited params (which
 # will also be inherited from the base class's param struct
 # here). Sort the params based on their key
-params = list(map(lambda k_v: k_v[1],
-                  sorted(sim_object._params.local.items())))
+params = list(
+    map(lambda k_v: k_v[1], sorted(sim_object._params.local.items()))
+)
 ports = sim_object._ports.local
 
 # only include pybind if python is enabled in the build
 if use_python:
 
-    code('''#include "pybind11/pybind11.h"
+    code(
+        """#include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
 #include <type_traits>
@@ -99,9 +102,11 @@
 
 #include "${{sim_object.cxx_header}}"
 
-''')
+"""
+    )
 else:
-    code('''
+    code(
+        """
 #include <type_traits>
 
 #include "base/compiler.hh"
@@ -109,13 +114,15 @@
 
 #include "${{sim_object.cxx_header}}"
 
-''')
+"""
+    )
 # only include the python params code if python is enabled.
 if use_python:
     for param in params:
         param.pybind_predecls(code)
 
-    code('''namespace py = pybind11;
+    code(
+        """namespace py = pybind11;
 
 namespace gem5
 {
@@ -124,39 +131,48 @@
 module_init(py::module_ &m_internal)
 {
 py::module_ m = m_internal.def_submodule("param_${sim_object}");
-''')
+"""
+    )
     code.indent()
     if sim_object._base:
-        code('py::class_<${sim_object}Params, ' \
-             '${{sim_object._base.type}}Params, ' \
-             'std::unique_ptr<${{sim_object}}Params, py::nodelete>>(' \
-             'm, "${sim_object}Params")')
+        code(
+            "py::class_<${sim_object}Params, "
+            "${{sim_object._base.type}}Params, "
+            "std::unique_ptr<${{sim_object}}Params, py::nodelete>>("
+            'm, "${sim_object}Params")'
+        )
     else:
-        code('py::class_<${sim_object}Params, ' \
-            'std::unique_ptr<${sim_object}Params, py::nodelete>>(' \
-            'm, "${sim_object}Params")')
+        code(
+            "py::class_<${sim_object}Params, "
+            "std::unique_ptr<${sim_object}Params, py::nodelete>>("
+            'm, "${sim_object}Params")'
+        )
 
     code.indent()
-    if not hasattr(sim_object, 'abstract') or not sim_object.abstract:
-        code('.def(py::init<>())')
+    if not hasattr(sim_object, "abstract") or not sim_object.abstract:
+        code(".def(py::init<>())")
         code('.def("create", &${sim_object}Params::create)')
 
-    param_exports = sim_object.cxx_param_exports + [
-        PyBindProperty(k)
-        for k, v in sorted(sim_object._params.local.items())
-    ] + [
-        PyBindProperty(f"port_{port.name}_connection_count")
-        for port in ports.values()
-    ]
+    param_exports = (
+        sim_object.cxx_param_exports
+        + [
+            PyBindProperty(k)
+            for k, v in sorted(sim_object._params.local.items())
+        ]
+        + [
+            PyBindProperty(f"port_{port.name}_connection_count")
+            for port in ports.values()
+        ]
+    )
     for exp in param_exports:
         exp.export(code, f"{sim_object}Params")
 
-    code(';')
+    code(";")
     code()
     code.dedent()
 
     bases = []
-    if 'cxx_base' in sim_object._value_dict:
+    if "cxx_base" in sim_object._value_dict:
         # If the c++ base class implied by python inheritance was
         # overridden, use that value.
         if sim_object.cxx_base:
@@ -170,32 +186,39 @@
 
     if bases:
         base_str = ", ".join(bases)
-        code('py::class_<${{sim_object.cxx_class}}, ${base_str}, ' \
-            'std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>(' \
-            'm, "${py_class_name}")')
+        code(
+            "py::class_<${{sim_object.cxx_class}}, ${base_str}, "
+            "std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>("
+            'm, "${py_class_name}")'
+        )
     else:
-        code('py::class_<${{sim_object.cxx_class}}, ' \
-            'std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>(' \
-            'm, "${py_class_name}")')
+        code(
+            "py::class_<${{sim_object.cxx_class}}, "
+            "std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>("
+            'm, "${py_class_name}")'
+        )
     code.indent()
     for exp in sim_object.cxx_exports:
         exp.export(code, sim_object.cxx_class)
-    code(';')
+    code(";")
     code.dedent()
     code()
     code.dedent()
-    code('}')
+    code("}")
     code()
-    code('static EmbeddedPyBind '
-         'embed_obj("${0}", module_init, "${1}");',
-        sim_object, sim_object._base.type if sim_object._base else "")
+    code(
+        "static EmbeddedPyBind " 'embed_obj("${0}", module_init, "${1}");',
+        sim_object,
+        sim_object._base.type if sim_object._base else "",
+    )
     code()
-    code('} // namespace gem5')
+    code("} // namespace gem5")
 
 # include the create() methods whether or not python is enabled.
-if not hasattr(sim_object, 'abstract') or not sim_object.abstract:
-    if 'type' in sim_object.__dict__:
-        code('''
+if not hasattr(sim_object, "abstract") or not sim_object.abstract:
+    if "type" in sim_object.__dict__:
+        code(
+            """
 namespace gem5
 {
 
@@ -268,6 +291,7 @@
 }
 
 } // namespace gem5
-''')
+"""
+        )
 
 code.write(args.param_cc)
diff --git a/build_tools/sim_object_param_struct_hh.py b/build_tools/sim_object_param_struct_hh.py
index 261ac9b..bf37da2 100644
--- a/build_tools/sim_object_param_struct_hh.py
+++ b/build_tools/sim_object_param_struct_hh.py
@@ -46,8 +46,8 @@
 from code_formatter import code_formatter
 
 parser = argparse.ArgumentParser()
-parser.add_argument('modpath', help='module the simobject belongs to')
-parser.add_argument('param_hh', help='parameter header file to generate')
+parser.add_argument("modpath", help="module the simobject belongs to")
+parser.add_argument("param_hh", help="parameter header file to generate")
 
 args = parser.parse_args()
 
@@ -67,8 +67,9 @@
 # the object itself, not including inherited params (which
 # will also be inherited from the base class's param struct
 # here). Sort the params based on their key
-params = list(map(lambda k_v: k_v[1],
-    sorted(sim_object._params.local.items())))
+params = list(
+    map(lambda k_v: k_v[1], sorted(sim_object._params.local.items()))
+)
 ports = sim_object._ports.local
 try:
     ptypes = [p.ptype for p in params]
@@ -79,41 +80,44 @@
 
 warned_about_nested_templates = False
 
+
 class CxxClass(object):
     def __init__(self, sig, template_params=[]):
         # Split the signature into its constituent parts. This could
         # potentially be done with regular expressions, but
         # it's simple enough to pick appart a class signature
         # manually.
-        parts = sig.split('<', 1)
+        parts = sig.split("<", 1)
         base = parts[0]
         t_args = []
         if len(parts) > 1:
             # The signature had template arguments.
-            text = parts[1].rstrip(' \t\n>')
-            arg = ''
+            text = parts[1].rstrip(" \t\n>")
+            arg = ""
             # Keep track of nesting to avoid splitting on ","s embedded
             # in the arguments themselves.
             depth = 0
             for c in text:
-                if c == '<':
+                if c == "<":
                     depth = depth + 1
                     if depth > 0 and not warned_about_nested_templates:
                         warned_about_nested_templates = True
-                        print('Nested template argument in cxx_class.'
-                              ' This feature is largely untested and '
-                              ' may not work.')
-                elif c == '>':
+                        print(
+                            "Nested template argument in cxx_class."
+                            " This feature is largely untested and "
+                            " may not work."
+                        )
+                elif c == ">":
                     depth = depth - 1
-                elif c == ',' and depth == 0:
+                elif c == "," and depth == 0:
                     t_args.append(arg.strip())
-                    arg = ''
+                    arg = ""
                 else:
                     arg = arg + c
             if arg:
                 t_args.append(arg.strip())
         # Split the non-template part on :: boundaries.
-        class_path = base.split('::')
+        class_path = base.split("::")
 
         # The namespaces are everything except the last part of the class path.
         self.namespaces = class_path[:-1]
@@ -125,7 +129,7 @@
         # Iterate through the template arguments and their values. This
         # will likely break if parameter packs are used.
         for arg, param in zip(t_args, template_params):
-            type_keys = ('class', 'typename')
+            type_keys = ("class", "typename")
             # If a parameter is a type, parse it recursively. Otherwise
             # assume it's a constant, and store it verbatim.
             if any(param.strip().startswith(kw) for kw in type_keys):
@@ -140,21 +144,24 @@
                 arg.declare(code)
         # Re-open the target namespace.
         for ns in self.namespaces:
-            code('namespace $ns {')
+            code("namespace $ns {")
         # If this is a class template...
         if self.template_params:
             code('template <${{", ".join(self.template_params)}}>')
         # The actual class declaration.
-        code('class ${{self.name}};')
+        code("class ${{self.name}};")
         # Close the target namespaces.
         for ns in reversed(self.namespaces):
-            code('} // namespace $ns')
+            code("} // namespace $ns")
 
-code('''\
+
+code(
+    """\
 #ifndef __PARAMS__${sim_object}__
 #define __PARAMS__${sim_object}__
 
-''')
+"""
+)
 
 
 # The base SimObject has a couple of params that get
@@ -162,10 +169,12 @@
 # the normal Param mechanism; we slip them in here (needed
 # predecls now, actual declarations below)
 if sim_object == SimObject:
-    code('''#include <string>''')
+    code("""#include <string>""")
 
-cxx_class = CxxClass(sim_object._value_dict['cxx_class'],
-                     sim_object._value_dict['cxx_template_params'])
+cxx_class = CxxClass(
+    sim_object._value_dict["cxx_class"],
+    sim_object._value_dict["cxx_template_params"],
+)
 
 # A forward class declaration is sufficient since we are just
 # declaring a pointer.
@@ -186,27 +195,29 @@
         code('#include "enums/${{ptype.__name__}}.hh"')
         code()
 
-code('namespace gem5')
-code('{')
-code('')
+code("namespace gem5")
+code("{")
+code("")
 
 # now generate the actual param struct
 code("struct ${sim_object}Params")
 if sim_object._base:
     code("    : public ${{sim_object._base.type}}Params")
 code("{")
-if not hasattr(sim_object, 'abstract') or not sim_object.abstract:
-    if 'type' in sim_object.__dict__:
+if not hasattr(sim_object, "abstract") or not sim_object.abstract:
+    if "type" in sim_object.__dict__:
         code("    ${{sim_object.cxx_type}} create() const;")
 
 code.indent()
 if sim_object == SimObject:
-    code('''
+    code(
+        """
 SimObjectParams() {}
 virtual ~SimObjectParams() {}
 
 std::string name;
-    ''')
+    """
+    )
 
 for param in params:
     param.cxx_decl(code)
@@ -214,11 +225,11 @@
     port.cxx_decl(code)
 
 code.dedent()
-code('};')
+code("};")
 code()
-code('} // namespace gem5')
+code("} // namespace gem5")
 
 code()
-code('#endif // __PARAMS__${sim_object}__')
+code("#endif // __PARAMS__${sim_object}__")
 
 code.write(args.param_hh)
diff --git a/configs/common/Benchmarks.py b/configs/common/Benchmarks.py
index 591c044..c90e78e 100644
--- a/configs/common/Benchmarks.py
+++ b/configs/common/Benchmarks.py
@@ -28,9 +28,11 @@
 from os import environ as env
 from m5.defines import buildEnv
 
+
 class SysConfig:
-    def __init__(self, script=None, mem=None, disks=None, rootdev=None,
-                 os_type='linux'):
+    def __init__(
+        self, script=None, mem=None, disks=None, rootdev=None, os_type="linux"
+    ):
         self.scriptname = script
         self.disknames = disks
         self.memsize = mem
@@ -41,13 +43,13 @@
         if self.scriptname:
             return script(self.scriptname)
         else:
-            return ''
+            return ""
 
     def mem(self):
         if self.memsize:
             return self.memsize
         else:
-            return '128MB'
+            return "128MB"
 
     def disks(self):
         if self.disknames:
@@ -59,72 +61,117 @@
         if self.root:
             return self.root
         else:
-            return '/dev/sda1'
+            return "/dev/sda1"
 
     def os_type(self):
         return self.ostype
 
+
 # Benchmarks are defined as a key in a dict which is a list of SysConfigs
 # The first defined machine is the test system, the others are driving systems
 
 Benchmarks = {
-    'PovrayBench':  [SysConfig('povray-bench.rcS', '512MB', ['povray.img'])],
-    'PovrayAutumn': [SysConfig('povray-autumn.rcS', '512MB', ['povray.img'])],
-
-    'NetperfStream':    [SysConfig('netperf-stream-client.rcS'),
-                         SysConfig('netperf-server.rcS')],
-    'NetperfStreamUdp': [SysConfig('netperf-stream-udp-client.rcS'),
-                         SysConfig('netperf-server.rcS')],
-    'NetperfUdpLocal':  [SysConfig('netperf-stream-udp-local.rcS')],
-    'NetperfStreamNT':  [SysConfig('netperf-stream-nt-client.rcS'),
-                         SysConfig('netperf-server.rcS')],
-    'NetperfMaerts':    [SysConfig('netperf-maerts-client.rcS'),
-                         SysConfig('netperf-server.rcS')],
-    'SurgeStandard':    [SysConfig('surge-server.rcS', '512MB'),
-                         SysConfig('surge-client.rcS', '256MB')],
-    'SurgeSpecweb':     [SysConfig('spec-surge-server.rcS', '512MB'),
-                         SysConfig('spec-surge-client.rcS', '256MB')],
-    'Nhfsstone':        [SysConfig('nfs-server-nhfsstone.rcS', '512MB'),
-                         SysConfig('nfs-client-nhfsstone.rcS')],
-    'Nfs':              [SysConfig('nfs-server.rcS', '900MB'),
-                         SysConfig('nfs-client-dbench.rcS')],
-    'NfsTcp':           [SysConfig('nfs-server.rcS', '900MB'),
-                         SysConfig('nfs-client-tcp.rcS')],
-    'IScsiInitiator':   [SysConfig('iscsi-client.rcS', '512MB'),
-                         SysConfig('iscsi-server.rcS', '512MB')],
-    'IScsiTarget':      [SysConfig('iscsi-server.rcS', '512MB'),
-                         SysConfig('iscsi-client.rcS', '512MB')],
-    'Validation':       [SysConfig('iscsi-server.rcS', '512MB'),
-                         SysConfig('iscsi-client.rcS', '512MB')],
-    'Ping':             [SysConfig('ping-server.rcS',),
-                         SysConfig('ping-client.rcS')],
-
-    'ValAccDelay':      [SysConfig('devtime.rcS', '512MB')],
-    'ValAccDelay2':     [SysConfig('devtimewmr.rcS', '512MB')],
-    'ValMemLat':        [SysConfig('micro_memlat.rcS', '512MB')],
-    'ValMemLat2MB':     [SysConfig('micro_memlat2mb.rcS', '512MB')],
-    'ValMemLat8MB':     [SysConfig('micro_memlat8mb.rcS', '512MB')],
-    'ValMemLat':        [SysConfig('micro_memlat8.rcS', '512MB')],
-    'ValTlbLat':        [SysConfig('micro_tlblat.rcS', '512MB')],
-    'ValSysLat':        [SysConfig('micro_syscall.rcS', '512MB')],
-    'ValCtxLat':        [SysConfig('micro_ctx.rcS', '512MB')],
-    'ValStream':        [SysConfig('micro_stream.rcS', '512MB')],
-    'ValStreamScale':   [SysConfig('micro_streamscale.rcS', '512MB')],
-    'ValStreamCopy':    [SysConfig('micro_streamcopy.rcS', '512MB')],
-
-    'MutexTest':        [SysConfig('mutex-test.rcS', '128MB')],
-    'ArmAndroid-GB':    [SysConfig('null.rcS', '256MB',
-                    ['ARMv7a-Gingerbread-Android.SMP.mouse.nolock.clean.img'],
-                    None, 'android-gingerbread')],
-    'bbench-gb': [SysConfig('bbench-gb.rcS', '256MB',
-                        ['ARMv7a-Gingerbread-Android.SMP.mouse.nolock.img'],
-                            None, 'android-gingerbread')],
-    'ArmAndroid-ICS':   [SysConfig('null.rcS', '256MB',
-                            ['ARMv7a-ICS-Android.SMP.nolock.clean.img'],
-                            None, 'android-ics')],
-    'bbench-ics':       [SysConfig('bbench-ics.rcS', '256MB',
-                            ['ARMv7a-ICS-Android.SMP.nolock.img'],
-                            None, 'android-ics')]
+    "PovrayBench": [SysConfig("povray-bench.rcS", "512MB", ["povray.img"])],
+    "PovrayAutumn": [SysConfig("povray-autumn.rcS", "512MB", ["povray.img"])],
+    "NetperfStream": [
+        SysConfig("netperf-stream-client.rcS"),
+        SysConfig("netperf-server.rcS"),
+    ],
+    "NetperfStreamUdp": [
+        SysConfig("netperf-stream-udp-client.rcS"),
+        SysConfig("netperf-server.rcS"),
+    ],
+    "NetperfUdpLocal": [SysConfig("netperf-stream-udp-local.rcS")],
+    "NetperfStreamNT": [
+        SysConfig("netperf-stream-nt-client.rcS"),
+        SysConfig("netperf-server.rcS"),
+    ],
+    "NetperfMaerts": [
+        SysConfig("netperf-maerts-client.rcS"),
+        SysConfig("netperf-server.rcS"),
+    ],
+    "SurgeStandard": [
+        SysConfig("surge-server.rcS", "512MB"),
+        SysConfig("surge-client.rcS", "256MB"),
+    ],
+    "SurgeSpecweb": [
+        SysConfig("spec-surge-server.rcS", "512MB"),
+        SysConfig("spec-surge-client.rcS", "256MB"),
+    ],
+    "Nhfsstone": [
+        SysConfig("nfs-server-nhfsstone.rcS", "512MB"),
+        SysConfig("nfs-client-nhfsstone.rcS"),
+    ],
+    "Nfs": [
+        SysConfig("nfs-server.rcS", "900MB"),
+        SysConfig("nfs-client-dbench.rcS"),
+    ],
+    "NfsTcp": [
+        SysConfig("nfs-server.rcS", "900MB"),
+        SysConfig("nfs-client-tcp.rcS"),
+    ],
+    "IScsiInitiator": [
+        SysConfig("iscsi-client.rcS", "512MB"),
+        SysConfig("iscsi-server.rcS", "512MB"),
+    ],
+    "IScsiTarget": [
+        SysConfig("iscsi-server.rcS", "512MB"),
+        SysConfig("iscsi-client.rcS", "512MB"),
+    ],
+    "Validation": [
+        SysConfig("iscsi-server.rcS", "512MB"),
+        SysConfig("iscsi-client.rcS", "512MB"),
+    ],
+    "Ping": [SysConfig("ping-server.rcS"), SysConfig("ping-client.rcS")],
+    "ValAccDelay": [SysConfig("devtime.rcS", "512MB")],
+    "ValAccDelay2": [SysConfig("devtimewmr.rcS", "512MB")],
+    "ValMemLat": [SysConfig("micro_memlat.rcS", "512MB")],
+    "ValMemLat2MB": [SysConfig("micro_memlat2mb.rcS", "512MB")],
+    "ValMemLat8MB": [SysConfig("micro_memlat8mb.rcS", "512MB")],
+    "ValMemLat": [SysConfig("micro_memlat8.rcS", "512MB")],
+    "ValTlbLat": [SysConfig("micro_tlblat.rcS", "512MB")],
+    "ValSysLat": [SysConfig("micro_syscall.rcS", "512MB")],
+    "ValCtxLat": [SysConfig("micro_ctx.rcS", "512MB")],
+    "ValStream": [SysConfig("micro_stream.rcS", "512MB")],
+    "ValStreamScale": [SysConfig("micro_streamscale.rcS", "512MB")],
+    "ValStreamCopy": [SysConfig("micro_streamcopy.rcS", "512MB")],
+    "MutexTest": [SysConfig("mutex-test.rcS", "128MB")],
+    "ArmAndroid-GB": [
+        SysConfig(
+            "null.rcS",
+            "256MB",
+            ["ARMv7a-Gingerbread-Android.SMP.mouse.nolock.clean.img"],
+            None,
+            "android-gingerbread",
+        )
+    ],
+    "bbench-gb": [
+        SysConfig(
+            "bbench-gb.rcS",
+            "256MB",
+            ["ARMv7a-Gingerbread-Android.SMP.mouse.nolock.img"],
+            None,
+            "android-gingerbread",
+        )
+    ],
+    "ArmAndroid-ICS": [
+        SysConfig(
+            "null.rcS",
+            "256MB",
+            ["ARMv7a-ICS-Android.SMP.nolock.clean.img"],
+            None,
+            "android-ics",
+        )
+    ],
+    "bbench-ics": [
+        SysConfig(
+            "bbench-ics.rcS",
+            "256MB",
+            ["ARMv7a-ICS-Android.SMP.nolock.img"],
+            None,
+            "android-ics",
+        )
+    ],
 }
 
 benchs = list(Benchmarks.keys())
diff --git a/configs/common/CacheConfig.py b/configs/common/CacheConfig.py
index 61c6a30..63ffe67 100644
--- a/configs/common/CacheConfig.py
+++ b/configs/common/CacheConfig.py
@@ -42,9 +42,13 @@
 
 import m5
 from m5.objects import *
+from gem5.isas import ISA
+from gem5.runtime import get_runtime_isa
+
 from common.Caches import *
 from common import ObjectList
 
+
 def _get_hwp(hwp_option):
     if hwp_option == None:
         return NULL
@@ -52,23 +56,25 @@
     hwpClass = ObjectList.hwp_list.get(hwp_option)
     return hwpClass()
 
+
 def _get_cache_opts(level, options):
     opts = {}
 
-    size_attr = '{}_size'.format(level)
+    size_attr = "{}_size".format(level)
     if hasattr(options, size_attr):
-        opts['size'] = getattr(options, size_attr)
+        opts["size"] = getattr(options, size_attr)
 
-    assoc_attr = '{}_assoc'.format(level)
+    assoc_attr = "{}_assoc".format(level)
     if hasattr(options, assoc_attr):
-        opts['assoc'] = getattr(options, assoc_attr)
+        opts["assoc"] = getattr(options, assoc_attr)
 
-    prefetcher_attr = '{}_hwp_type'.format(level)
+    prefetcher_attr = "{}_hwp_type".format(level)
     if hasattr(options, prefetcher_attr):
-        opts['prefetcher'] = _get_hwp(getattr(options, prefetcher_attr))
+        opts["prefetcher"] = _get_hwp(getattr(options, prefetcher_attr))
 
     return opts
 
+
 def config_cache(options, system):
     if options.external_memory_system and (options.caches or options.l2cache):
         print("External caches and internal caches are exclusive options.\n")
@@ -84,10 +90,12 @@
             print("O3_ARM_v7a_3 is unavailable. Did you compile the O3 model?")
             sys.exit(1)
 
-        dcache_class, icache_class, l2_cache_class, walk_cache_class = \
-            core.O3_ARM_v7a_DCache, core.O3_ARM_v7a_ICache, \
-            core.O3_ARM_v7aL2, \
-            None
+        dcache_class, icache_class, l2_cache_class, walk_cache_class = (
+            core.O3_ARM_v7a_DCache,
+            core.O3_ARM_v7a_ICache,
+            core.O3_ARM_v7aL2,
+            None,
+        )
     elif options.cpu_type == "HPI":
         try:
             import cores.arm.HPI as core
@@ -95,13 +103,21 @@
             print("HPI is unavailable.")
             sys.exit(1)
 
-        dcache_class, icache_class, l2_cache_class, walk_cache_class = \
-            core.HPI_DCache, core.HPI_ICache, core.HPI_L2, None
+        dcache_class, icache_class, l2_cache_class, walk_cache_class = (
+            core.HPI_DCache,
+            core.HPI_ICache,
+            core.HPI_L2,
+            None,
+        )
     else:
-        dcache_class, icache_class, l2_cache_class, walk_cache_class = \
-            L1_DCache, L1_ICache, L2Cache, None
+        dcache_class, icache_class, l2_cache_class, walk_cache_class = (
+            L1_DCache,
+            L1_ICache,
+            L2Cache,
+            None,
+        )
 
-        if buildEnv['TARGET_ISA'] in ['x86', 'riscv']:
+        if get_runtime_isa() in [ISA.X86, ISA.RISCV]:
             walk_cache_class = PageTableWalkerCache
 
     # Set the cache line size of the system
@@ -118,10 +134,11 @@
         # Provide a clock for the L2 and the L1-to-L2 bus here as they
         # are not connected using addTwoLevelCacheHierarchy. Use the
         # same clock as the CPUs.
-        system.l2 = l2_cache_class(clk_domain=system.cpu_clk_domain,
-                                   **_get_cache_opts('l2', options))
+        system.l2 = l2_cache_class(
+            clk_domain=system.cpu_clk_domain, **_get_cache_opts("l2", options)
+        )
 
-        system.tol2bus = L2XBar(clk_domain = system.cpu_clk_domain)
+        system.tol2bus = L2XBar(clk_domain=system.cpu_clk_domain)
         system.l2.cpu_side = system.tol2bus.mem_side_ports
         system.l2.mem_side = system.membus.cpu_side_ports
 
@@ -130,8 +147,8 @@
 
     for i in range(options.num_cpus):
         if options.caches:
-            icache = icache_class(**_get_cache_opts('l1i', options))
-            dcache = dcache_class(**_get_cache_opts('l1d', options))
+            icache = icache_class(**_get_cache_opts("l1i", options))
+            dcache = dcache_class(**_get_cache_opts("l1d", options))
 
             # If we have a walker cache specified, instantiate two
             # instances here
@@ -159,8 +176,9 @@
 
             # When connecting the caches, the clock is also inherited
             # from the CPU in question
-            system.cpu[i].addPrivateSplitL1Caches(icache, dcache,
-                                                  iwalkcache, dwalkcache)
+            system.cpu[i].addPrivateSplitL1Caches(
+                icache, dcache, iwalkcache, dwalkcache
+            )
 
             if options.memchecker:
                 # The mem_side ports of the caches haven't been connected yet.
@@ -174,47 +192,56 @@
             # on these names.  For simplicity, we would advise configuring
             # it to use this naming scheme; if this isn't possible, change
             # the names below.
-            if buildEnv['TARGET_ISA'] in ['x86', 'arm', 'riscv']:
+            if get_runtime_isa() in [ISA.X86, ISA.ARM, ISA.RISCV]:
                 system.cpu[i].addPrivateSplitL1Caches(
-                        ExternalCache("cpu%d.icache" % i),
-                        ExternalCache("cpu%d.dcache" % i),
-                        ExternalCache("cpu%d.itb_walker_cache" % i),
-                        ExternalCache("cpu%d.dtb_walker_cache" % i))
+                    ExternalCache("cpu%d.icache" % i),
+                    ExternalCache("cpu%d.dcache" % i),
+                    ExternalCache("cpu%d.itb_walker_cache" % i),
+                    ExternalCache("cpu%d.dtb_walker_cache" % i),
+                )
             else:
                 system.cpu[i].addPrivateSplitL1Caches(
-                        ExternalCache("cpu%d.icache" % i),
-                        ExternalCache("cpu%d.dcache" % i))
+                    ExternalCache("cpu%d.icache" % i),
+                    ExternalCache("cpu%d.dcache" % i),
+                )
 
         system.cpu[i].createInterruptController()
         if options.l2cache:
             system.cpu[i].connectAllPorts(
                 system.tol2bus.cpu_side_ports,
-                system.membus.cpu_side_ports, system.membus.mem_side_ports)
+                system.membus.cpu_side_ports,
+                system.membus.mem_side_ports,
+            )
         elif options.external_memory_system:
             system.cpu[i].connectUncachedPorts(
-                system.membus.cpu_side_ports, system.membus.mem_side_ports)
+                system.membus.cpu_side_ports, system.membus.mem_side_ports
+            )
         else:
             system.cpu[i].connectBus(system.membus)
 
     return system
 
+
 # ExternalSlave provides a "port", but when that port connects to a cache,
 # the connecting CPU SimObject wants to refer to its "cpu_side".
 # The 'ExternalCache' class provides this adaptation by rewriting the name,
 # eliminating distracting changes elsewhere in the config code.
 class ExternalCache(ExternalSlave):
     def __getattr__(cls, attr):
-        if (attr == "cpu_side"):
+        if attr == "cpu_side":
             attr = "port"
         return super(ExternalSlave, cls).__getattr__(attr)
 
     def __setattr__(cls, attr, value):
-        if (attr == "cpu_side"):
+        if attr == "cpu_side":
             attr = "port"
         return super(ExternalSlave, cls).__setattr__(attr, value)
 
+
 def ExternalCacheFactory(port_type):
     def make(name):
-        return ExternalCache(port_data=name, port_type=port_type,
-                             addr_ranges=[AllMemory])
+        return ExternalCache(
+            port_data=name, port_type=port_type, addr_ranges=[AllMemory]
+        )
+
     return make
diff --git a/configs/common/Caches.py b/configs/common/Caches.py
index 1468b95..e25d16c 100644
--- a/configs/common/Caches.py
+++ b/configs/common/Caches.py
@@ -39,6 +39,8 @@
 
 from m5.defines import buildEnv
 from m5.objects import *
+from gem5.isas import ISA
+from gem5.runtime import get_runtime_isa
 
 # Base implementations of L1, L2, IO and TLB-walker caches. There are
 # used in the regressions and also as base components in the
@@ -46,6 +48,7 @@
 # starting point, and specific parameters can be overridden in the
 # specific instantiations.
 
+
 class L1Cache(Cache):
     assoc = 2
     tag_latency = 2
@@ -54,14 +57,17 @@
     mshrs = 4
     tgts_per_mshr = 20
 
+
 class L1_ICache(L1Cache):
     is_read_only = True
     # Writeback clean lines as well
     writeback_clean = True
 
+
 class L1_DCache(L1Cache):
     pass
 
+
 class L2Cache(Cache):
     assoc = 8
     tag_latency = 20
@@ -71,26 +77,28 @@
     tgts_per_mshr = 12
     write_buffers = 8
 
+
 class IOCache(Cache):
     assoc = 8
     tag_latency = 50
     data_latency = 50
     response_latency = 50
     mshrs = 20
-    size = '1kB'
+    size = "1kB"
     tgts_per_mshr = 12
 
+
 class PageTableWalkerCache(Cache):
     assoc = 2
     tag_latency = 2
     data_latency = 2
     response_latency = 2
     mshrs = 10
-    size = '1kB'
+    size = "1kB"
     tgts_per_mshr = 12
 
     # the x86 table walker actually writes to the table-walker cache
-    if buildEnv['TARGET_ISA'] in ['x86', 'riscv']:
+    if get_runtime_isa() in [ISA.X86, ISA.RISCV]:
         is_read_only = False
     else:
         is_read_only = True
diff --git a/configs/common/CpuConfig.py b/configs/common/CpuConfig.py
index d34143c..1672d43 100644
--- a/configs/common/CpuConfig.py
+++ b/configs/common/CpuConfig.py
@@ -36,6 +36,7 @@
 from m5 import fatal
 import m5.objects
 
+
 def config_etrace(cpu_cls, cpu_list, options):
     if issubclass(cpu_cls, m5.objects.DerivO3CPU):
         # Assign the same file name to all cpus for now. This must be
@@ -45,17 +46,21 @@
             # file names. Set the dependency window size equal to the cpu it
             # is attached to.
             cpu.traceListener = m5.objects.ElasticTrace(
-                                instFetchTraceFile = options.inst_trace_file,
-                                dataDepTraceFile = options.data_trace_file,
-                                depWindowSize = 3 * cpu.numROBEntries)
+                instFetchTraceFile=options.inst_trace_file,
+                dataDepTraceFile=options.data_trace_file,
+                depWindowSize=3 * cpu.numROBEntries,
+            )
             # Make the number of entries in the ROB, LQ and SQ very
             # large so that there are no stalls due to resource
             # limitation as such stalls will get captured in the trace
             # as compute delay. For replay, ROB, LQ and SQ sizes are
             # modelled in the Trace CPU.
-            cpu.numROBEntries = 512;
-            cpu.LQEntries = 128;
-            cpu.SQEntries = 128;
+            cpu.numROBEntries = 512
+            cpu.LQEntries = 128
+            cpu.SQEntries = 128
     else:
-        fatal("%s does not support data dependency tracing. Use a CPU model of"
-              " type or inherited from DerivO3CPU.", cpu_cls)
+        fatal(
+            "%s does not support data dependency tracing. Use a CPU model of"
+            " type or inherited from DerivO3CPU.",
+            cpu_cls,
+        )
diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py
index febe146..5da951c 100644
--- a/configs/common/FSConfig.py
+++ b/configs/common/FSConfig.py
@@ -39,69 +39,87 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import m5
+import m5.defines
 from m5.objects import *
 from m5.util import *
 from common.Benchmarks import *
 from common import ObjectList
 
 # Populate to reflect supported os types per target ISA
-os_types = { 'mips'  : [ 'linux' ],
-             'riscv' : [ 'linux' ], # TODO that's a lie
-             'sparc' : [ 'linux' ],
-             'x86'   : [ 'linux' ],
-             'arm'   : [ 'linux',
-                         'android-gingerbread',
-                         'android-ics',
-                         'android-jellybean',
-                         'android-kitkat',
-                         'android-nougat', ],
-           }
+os_types = set()
+if m5.defines.buildEnv["USE_ARM_ISA"]:
+    os_types.update(
+        [
+            "linux",
+            "android-gingerbread",
+            "android-ics",
+            "android-jellybean",
+            "android-kitkat",
+            "android-nougat",
+        ]
+    )
+if m5.defines.buildEnv["USE_MIPS_ISA"]:
+    os_types.add("linux")
+if m5.defines.buildEnv["USE_POWER_ISA"]:
+    os_types.add("linux")
+if m5.defines.buildEnv["USE_RISCV_ISA"]:
+    os_types.add("linux")  # TODO that's a lie
+if m5.defines.buildEnv["USE_SPARC_ISA"]:
+    os_types.add("linux")
+if m5.defines.buildEnv["USE_X86_ISA"]:
+    os_types.add("linux")
+
 
 class CowIdeDisk(IdeDisk):
-    image = CowDiskImage(child=RawDiskImage(read_only=True),
-                         read_only=False)
+    image = CowDiskImage(child=RawDiskImage(read_only=True), read_only=False)
 
     def childImage(self, ci):
         self.image.child.image_file = ci
 
+
 class MemBus(SystemXBar):
     badaddr_responder = BadAddr()
     default = Self.badaddr_responder.pio
 
+
 def attach_9p(parent, bus):
     viopci = PciVirtIO()
     viopci.vio = VirtIO9PDiod()
-    viodir = os.path.realpath(os.path.join(m5.options.outdir, '9p'))
-    viopci.vio.root = os.path.join(viodir, 'share')
-    viopci.vio.socketPath = os.path.join(viodir, 'socket')
+    viodir = os.path.realpath(os.path.join(m5.options.outdir, "9p"))
+    viopci.vio.root = os.path.join(viodir, "share")
+    viopci.vio.socketPath = os.path.join(viodir, "socket")
     os.makedirs(viopci.vio.root, exist_ok=True)
     if os.path.exists(viopci.vio.socketPath):
         os.remove(viopci.vio.socketPath)
     parent.viopci = viopci
     parent.attachPciDevice(viopci, bus)
 
+
 def fillInCmdline(mdesc, template, **kwargs):
-    kwargs.setdefault('rootdev', mdesc.rootdev())
-    kwargs.setdefault('mem', mdesc.mem())
-    kwargs.setdefault('script', mdesc.script())
+    kwargs.setdefault("rootdev", mdesc.rootdev())
+    kwargs.setdefault("mem", mdesc.mem())
+    kwargs.setdefault("script", mdesc.script())
     return template % kwargs
 
+
 def makeCowDisks(disk_paths):
     disks = []
     for disk_path in disk_paths:
-        disk = CowIdeDisk(driveID='device0')
-        disk.childImage(disk_path);
+        disk = CowIdeDisk(driveID="device0")
+        disk.childImage(disk_path)
         disks.append(disk)
     return disks
 
+
 def makeSparcSystem(mem_mode, mdesc=None, cmdline=None):
     # Constants from iob.cc and uart8250.cc
     iob_man_addr = 0x9800000000
     uart_pio_size = 8
 
     class CowMmDisk(MmDisk):
-        image = CowDiskImage(child=RawDiskImage(read_only=True),
-                             read_only=False)
+        image = CowDiskImage(
+            child=RawDiskImage(read_only=True), read_only=False
+        )
 
         def childImage(self, ci):
             self.image.child.image_file = ci
@@ -113,12 +131,14 @@
     self.readfile = mdesc.script()
     self.iobus = IOXBar()
     self.membus = MemBus()
-    self.bridge = Bridge(delay='50ns')
+    self.bridge = Bridge(delay="50ns")
     self.t1000 = T1000()
     self.t1000.attachOnChipIO(self.membus)
     self.t1000.attachIO(self.iobus)
-    self.mem_ranges = [AddrRange(Addr('1MB'), size = '64MB'),
-                       AddrRange(Addr('2GB'), size ='256MB')]
+    self.mem_ranges = [
+        AddrRange(Addr("1MB"), size="64MB"),
+        AddrRange(Addr("2GB"), size="256MB"),
+    ]
     self.bridge.mem_side_port = self.iobus.cpu_side_ports
     self.bridge.cpu_side_port = self.membus.mem_side_ports
     self.disk0 = CowMmDisk()
@@ -128,36 +148,47 @@
     # The puart0 and hvuart are placed on the IO bus, so create ranges
     # for them. The remaining IO range is rather fragmented, so poke
     # holes for the iob and partition descriptors etc.
-    self.bridge.ranges = \
-        [
-        AddrRange(self.t1000.puart0.pio_addr,
-                  self.t1000.puart0.pio_addr + uart_pio_size - 1),
-        AddrRange(self.disk0.pio_addr,
-                  self.t1000.fake_jbi.pio_addr +
-                  self.t1000.fake_jbi.pio_size - 1),
-        AddrRange(self.t1000.fake_clk.pio_addr,
-                  iob_man_addr - 1),
-        AddrRange(self.t1000.fake_l2_1.pio_addr,
-                  self.t1000.fake_ssi.pio_addr +
-                  self.t1000.fake_ssi.pio_size - 1),
-        AddrRange(self.t1000.hvuart.pio_addr,
-                  self.t1000.hvuart.pio_addr + uart_pio_size - 1)
-        ]
+    self.bridge.ranges = [
+        AddrRange(
+            self.t1000.puart0.pio_addr,
+            self.t1000.puart0.pio_addr + uart_pio_size - 1,
+        ),
+        AddrRange(
+            self.disk0.pio_addr,
+            self.t1000.fake_jbi.pio_addr + self.t1000.fake_jbi.pio_size - 1,
+        ),
+        AddrRange(self.t1000.fake_clk.pio_addr, iob_man_addr - 1),
+        AddrRange(
+            self.t1000.fake_l2_1.pio_addr,
+            self.t1000.fake_ssi.pio_addr + self.t1000.fake_ssi.pio_size - 1,
+        ),
+        AddrRange(
+            self.t1000.hvuart.pio_addr,
+            self.t1000.hvuart.pio_addr + uart_pio_size - 1,
+        ),
+    ]
 
     workload = SparcFsWorkload()
 
     # ROM for OBP/Reset/Hypervisor
-    self.rom = SimpleMemory(image_file=binary('t1000_rom.bin'),
-            range=AddrRange(0xfff0000000, size='8MB'))
+    self.rom = SimpleMemory(
+        image_file=binary("t1000_rom.bin"),
+        range=AddrRange(0xFFF0000000, size="8MB"),
+    )
     # nvram
-    self.nvram = SimpleMemory(image_file=binary('nvram1'),
-            range=AddrRange(0x1f11000000, size='8kB'))
+    self.nvram = SimpleMemory(
+        image_file=binary("nvram1"), range=AddrRange(0x1F11000000, size="8kB")
+    )
     # hypervisor description
-    self.hypervisor_desc = SimpleMemory(image_file=binary('1up-hv.bin'),
-            range=AddrRange(0x1f12080000, size='8kB'))
+    self.hypervisor_desc = SimpleMemory(
+        image_file=binary("1up-hv.bin"),
+        range=AddrRange(0x1F12080000, size="8kB"),
+    )
     # partition description
-    self.partition_desc = SimpleMemory(image_file=binary('1up-md.bin'),
-            range=AddrRange(0x1f12000000, size='8kB'))
+    self.partition_desc = SimpleMemory(
+        image_file=binary("1up-md.bin"),
+        range=AddrRange(0x1F12000000, size="8kB"),
+    )
 
     self.rom.port = self.membus.mem_side_ports
     self.nvram.port = self.membus.mem_side_ports
@@ -170,10 +201,20 @@
 
     return self
 
-def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
-                  dtb_filename=None, bare_metal=False, cmdline=None,
-                  external_memory="", ruby=False,
-                  vio_9p=None, bootloader=None):
+
+def makeArmSystem(
+    mem_mode,
+    machine_type,
+    num_cpus=1,
+    mdesc=None,
+    dtb_filename=None,
+    bare_metal=False,
+    cmdline=None,
+    external_memory="",
+    ruby=False,
+    vio_9p=None,
+    bootloader=None,
+):
     assert machine_type
 
     pci_devices = []
@@ -187,7 +228,7 @@
     self.readfile = mdesc.script()
     self.iobus = IOXBar()
     if not ruby:
-        self.bridge = Bridge(delay='50ns')
+        self.bridge = Bridge(delay="50ns")
         self.bridge.mem_side_port = self.iobus.cpu_side_ports
         self.membus = MemBus()
         self.membus.badaddr_responder.warn_access = "warn"
@@ -227,13 +268,17 @@
             self.mem_ranges.append(AddrRange(region.start, size=size_remain))
             size_remain = 0
             break
-        warn("Memory size specified spans more than one region. Creating" \
-             " another memory controller for that range.")
+        warn(
+            "Memory size specified spans more than one region. Creating"
+            " another memory controller for that range."
+        )
 
     if size_remain > 0:
-        fatal("The currently selected ARM platforms doesn't support" \
-              " the amount of DRAM you've selected. Please try" \
-              " another platform")
+        fatal(
+            "The currently selected ARM platforms doesn't support"
+            " the amount of DRAM you've selected. Please try"
+            " another platform"
+        )
 
     if bare_metal:
         # EOT character on UART will end the simulation
@@ -245,16 +290,19 @@
         if dtb_filename:
             workload.dtb_filename = binary(dtb_filename)
 
-        workload.machine_type = \
+        workload.machine_type = (
             machine_type if machine_type in ArmMachineType.map else "DTOnly"
+        )
 
         # Ensure that writes to the UART actually go out early in the boot
         if not cmdline:
-            cmdline = 'earlyprintk=pl011,0x1c090000 console=ttyAMA0 ' + \
-                      'lpj=19988480 norandmaps rw loglevel=8 ' + \
-                      'mem=%(mem)s root=%(rootdev)s'
+            cmdline = (
+                "earlyprintk=pl011,0x1c090000 console=ttyAMA0 "
+                + "lpj=19988480 norandmaps rw loglevel=8 "
+                + "mem=%(mem)s root=%(rootdev)s"
+            )
 
-        if hasattr(self.realview.gic, 'cpu_addr'):
+        if hasattr(self.realview.gic, "cpu_addr"):
             self.gic_cpu_addr = self.realview.gic.cpu_addr
 
         # This check is for users who have previously put 'android' in
@@ -263,30 +311,37 @@
         # behavior has been replaced with a more explicit option per
         # the error message below. The disk can have any name now and
         # doesn't need to include 'android' substring.
-        if (mdesc.disks() and
-                os.path.split(mdesc.disks()[0])[-1].lower().count('android')):
-            if 'android' not in mdesc.os_type():
-                fatal("It looks like you are trying to boot an Android " \
-                      "platform.  To boot Android, you must specify " \
-                      "--os-type with an appropriate Android release on " \
-                      "the command line.")
+        if mdesc.disks() and os.path.split(mdesc.disks()[0])[-1].lower().count(
+            "android"
+        ):
+            if "android" not in mdesc.os_type():
+                fatal(
+                    "It looks like you are trying to boot an Android "
+                    "platform.  To boot Android, you must specify "
+                    "--os-type with an appropriate Android release on "
+                    "the command line."
+                )
 
         # android-specific tweaks
-        if 'android' in mdesc.os_type():
+        if "android" in mdesc.os_type():
             # generic tweaks
             cmdline += " init=/init"
 
             # release-specific tweaks
-            if 'kitkat' in mdesc.os_type():
-                cmdline += " androidboot.hardware=gem5 qemu=1 qemu.gles=0 " + \
-                           "android.bootanim=0 "
-            elif 'nougat' in mdesc.os_type():
-                cmdline += " androidboot.hardware=gem5 qemu=1 qemu.gles=0 " + \
-                           "android.bootanim=0 " + \
-                           "vmalloc=640MB " + \
-                           "android.early.fstab=/fstab.gem5 " + \
-                           "androidboot.selinux=permissive " + \
-                           "video=Virtual-1:1920x1080-16"
+            if "kitkat" in mdesc.os_type():
+                cmdline += (
+                    " androidboot.hardware=gem5 qemu=1 qemu.gles=0 "
+                    + "android.bootanim=0 "
+                )
+            elif "nougat" in mdesc.os_type():
+                cmdline += (
+                    " androidboot.hardware=gem5 qemu=1 qemu.gles=0 "
+                    + "android.bootanim=0 "
+                    + "vmalloc=640MB "
+                    + "android.early.fstab=/fstab.gem5 "
+                    + "androidboot.selinux=permissive "
+                    + "video=Virtual-1:1920x1080-16"
+                )
 
         workload.command_line = fillInCmdline(mdesc, cmdline)
 
@@ -296,14 +351,17 @@
 
     if external_memory:
         # I/O traffic enters iobus
-        self.external_io = ExternalMaster(port_data="external_io",
-                                          port_type=external_memory)
+        self.external_io = ExternalMaster(
+            port_data="external_io", port_type=external_memory
+        )
         self.external_io.port = self.iobus.cpu_side_ports
 
         # Ensure iocache only receives traffic destined for (actual) memory.
-        self.iocache = ExternalSlave(port_data="iocache",
-                                     port_type=external_memory,
-                                     addr_ranges=self.mem_ranges)
+        self.iocache = ExternalSlave(
+            port_data="iocache",
+            port_type=external_memory,
+            addr_ranges=self.mem_ranges,
+        )
         self.iocache.port = self.iobus.mem_side_ports
 
         # Let system_port get to nvmem and nothing else.
@@ -313,10 +371,11 @@
         # Attach off-chip devices
         self.realview.attachIO(self.iobus)
     elif ruby:
-        self._dma_ports = [ ]
-        self._mem_ports = [ ]
-        self.realview.attachOnChipIO(self.iobus,
-            dma_ports=self._dma_ports, mem_ports=self._mem_ports)
+        self._dma_ports = []
+        self._mem_ports = []
+        self.realview.attachOnChipIO(
+            self.iobus, dma_ports=self._dma_ports, mem_ports=self._mem_ports
+        )
         self.realview.attachIO(self.iobus, dma_ports=self._dma_ports)
     else:
         self.realview.attachOnChipIO(self.membus, self.bridge)
@@ -325,8 +384,8 @@
 
     for dev in pci_devices:
         self.realview.attachPciDevice(
-            dev, self.iobus,
-            dma_ports=self._dma_ports if ruby else None)
+            dev, self.iobus, dma_ports=self._dma_ports if ruby else None
+        )
 
     self.terminal = Terminal()
     self.vncserver = VncServer()
@@ -338,10 +397,12 @@
         self.system_port = self.membus.cpu_side_ports
 
     if ruby:
-        if buildEnv['PROTOCOL'] == 'MI_example' and num_cpus > 1:
-            fatal("The MI_example protocol cannot implement Load/Store "
-                  "Exclusive operations. Multicore ARM systems configured "
-                  "with the MI_example protocol will not work properly.")
+        if buildEnv["PROTOCOL"] == "MI_example" and num_cpus > 1:
+            fatal(
+                "The MI_example protocol cannot implement Load/Store "
+                "Exclusive operations. Multicore ARM systems configured "
+                "with the MI_example protocol will not work properly."
+            )
 
     return self
 
@@ -349,8 +410,9 @@
 def makeLinuxMipsSystem(mem_mode, mdesc=None, cmdline=None):
     class BaseMalta(Malta):
         ethernet = NSGigE(pci_bus=0, pci_dev=1, pci_func=0)
-        ide = IdeController(disks=Parent.disks,
-                            pci_func=0, pci_dev=0, pci_bus=0)
+        ide = IdeController(
+            disks=Parent.disks, pci_func=0, pci_dev=0, pci_bus=0
+        )
 
     self = System()
     if not mdesc:
@@ -359,8 +421,8 @@
     self.readfile = mdesc.script()
     self.iobus = IOXBar()
     self.membus = MemBus()
-    self.bridge = Bridge(delay='50ns')
-    self.mem_ranges = [AddrRange('1GB')]
+    self.bridge = Bridge(delay="50ns")
+    self.mem_ranges = [AddrRange("1GB")]
     self.bridge.mem_side_port = self.iobus.cpu_side_ports
     self.bridge.cpu_side_port = self.membus.mem_side_ports
     self.disks = makeCowDisks(mdesc.disks())
@@ -370,35 +432,38 @@
     self.malta.ide.dma = self.iobus.cpu_side_ports
     self.malta.ethernet.pio = self.iobus.mem_side_ports
     self.malta.ethernet.dma = self.iobus.cpu_side_ports
-    self.simple_disk = SimpleDisk(disk=RawDiskImage(
-        image_file = mdesc.disks()[0], read_only = True))
+    self.simple_disk = SimpleDisk(
+        disk=RawDiskImage(image_file=mdesc.disks()[0], read_only=True)
+    )
     self.mem_mode = mem_mode
     self.terminal = Terminal()
-    self.console = binary('mips/console')
+    self.console = binary("mips/console")
     if not cmdline:
-        cmdline = 'root=/dev/hda1 console=ttyS0'
+        cmdline = "root=/dev/hda1 console=ttyS0"
     self.workload = KernelWorkload(command_line=fillInCmdline(mdesc, cmdline))
 
     self.system_port = self.membus.cpu_side_ports
 
     return self
 
+
 def x86IOAddress(port):
     IO_address_space_base = 0x8000000000000000
     return IO_address_space_base + port
 
+
 def connectX86ClassicSystem(x86_sys, numCPUs):
     # Constants similar to x86_traits.hh
     IO_address_space_base = 0x8000000000000000
-    pci_config_address_space_base = 0xc000000000000000
-    interrupts_address_space_base = 0xa000000000000000
-    APIC_range_size = 1 << 12;
+    pci_config_address_space_base = 0xC000000000000000
+    interrupts_address_space_base = 0xA000000000000000
+    APIC_range_size = 1 << 12
 
     x86_sys.membus = MemBus()
 
     # North Bridge
     x86_sys.iobus = IOXBar()
-    x86_sys.bridge = Bridge(delay='50ns')
+    x86_sys.bridge = Bridge(delay="50ns")
     x86_sys.bridge.mem_side_port = x86_sys.iobus.cpu_side_ports
     x86_sys.bridge.cpu_side_port = x86_sys.membus.mem_side_ports
     # Allow the bridge to pass through:
@@ -407,30 +472,30 @@
     #  2) the bridge to pass through the IO APIC (two pages, already contained in 1),
     #  3) everything in the IO address range up to the local APIC, and
     #  4) then the entire PCI address space and beyond.
-    x86_sys.bridge.ranges = \
-        [
+    x86_sys.bridge.ranges = [
         AddrRange(0xC0000000, 0xFFFF0000),
-        AddrRange(IO_address_space_base,
-                  interrupts_address_space_base - 1),
-        AddrRange(pci_config_address_space_base,
-                  Addr.max)
-        ]
+        AddrRange(IO_address_space_base, interrupts_address_space_base - 1),
+        AddrRange(pci_config_address_space_base, Addr.max),
+    ]
 
     # Create a bridge from the IO bus to the memory bus to allow access to
     # the local APIC (two pages)
-    x86_sys.apicbridge = Bridge(delay='50ns')
+    x86_sys.apicbridge = Bridge(delay="50ns")
     x86_sys.apicbridge.cpu_side_port = x86_sys.iobus.mem_side_ports
     x86_sys.apicbridge.mem_side_port = x86_sys.membus.cpu_side_ports
-    x86_sys.apicbridge.ranges = [AddrRange(interrupts_address_space_base,
-                                           interrupts_address_space_base +
-                                           numCPUs * APIC_range_size
-                                           - 1)]
+    x86_sys.apicbridge.ranges = [
+        AddrRange(
+            interrupts_address_space_base,
+            interrupts_address_space_base + numCPUs * APIC_range_size - 1,
+        )
+    ]
 
     # connect the io bus
     x86_sys.pc.attachIO(x86_sys.iobus)
 
     x86_sys.system_port = x86_sys.membus.cpu_side_ports
 
+
 def connectX86RubySystem(x86_sys):
     # North Bridge
     x86_sys.iobus = IOXBar()
@@ -444,7 +509,7 @@
 def makeX86System(mem_mode, numCPUs=1, mdesc=None, workload=None, Ruby=False):
     self = System()
 
-    self.m5ops_base = 0xffff0000
+    self.m5ops_base = 0xFFFF0000
 
     if workload is None:
         workload = X86FsWorkload()
@@ -461,17 +526,22 @@
     # On the PC platform, the memory region 0xC0000000-0xFFFFFFFF is reserved
     # for various devices.  Hence, if the physical memory size is greater than
     # 3GB, we need to split it into two parts.
-    excess_mem_size = \
-        convert.toMemorySize(mdesc.mem()) - convert.toMemorySize('3GB')
+    excess_mem_size = convert.toMemorySize(mdesc.mem()) - convert.toMemorySize(
+        "3GB"
+    )
     if excess_mem_size <= 0:
         self.mem_ranges = [AddrRange(mdesc.mem())]
     else:
-        warn("Physical memory size specified is %s which is greater than " \
-             "3GB.  Twice the number of memory controllers would be " \
-             "created."  % (mdesc.mem()))
+        warn(
+            "Physical memory size specified is %s which is greater than "
+            "3GB.  Twice the number of memory controllers would be "
+            "created." % (mdesc.mem())
+        )
 
-        self.mem_ranges = [AddrRange('3GB'),
-            AddrRange(Addr('4GB'), size = excess_mem_size)]
+        self.mem_ranges = [
+            AddrRange("3GB"),
+            AddrRange(Addr("4GB"), size=excess_mem_size),
+        ]
 
     # Platform
     self.pc = Pc()
@@ -496,78 +566,78 @@
     madt_records = []
     for i in range(numCPUs):
         bp = X86IntelMPProcessor(
-                local_apic_id = i,
-                local_apic_version = 0x14,
-                enable = True,
-                bootstrap = (i == 0))
+            local_apic_id=i,
+            local_apic_version=0x14,
+            enable=True,
+            bootstrap=(i == 0),
+        )
         base_entries.append(bp)
-        lapic = X86ACPIMadtLAPIC(
-                acpi_processor_id=i,
-                apic_id=i,
-                flags=1)
+        lapic = X86ACPIMadtLAPIC(acpi_processor_id=i, apic_id=i, flags=1)
         madt_records.append(lapic)
     io_apic = X86IntelMPIOAPIC(
-            id = numCPUs,
-            version = 0x11,
-            enable = True,
-            address = 0xfec00000)
+        id=numCPUs, version=0x11, enable=True, address=0xFEC00000
+    )
     self.pc.south_bridge.io_apic.apic_id = io_apic.id
     base_entries.append(io_apic)
-    madt_records.append(X86ACPIMadtIOAPIC(id=io_apic.id,
-        address=io_apic.address, int_base=0))
+    madt_records.append(
+        X86ACPIMadtIOAPIC(id=io_apic.id, address=io_apic.address, int_base=0)
+    )
     # In gem5 Pc::calcPciConfigAddr(), it required "assert(bus==0)",
     # but linux kernel cannot config PCI device if it was not connected to
     # PCI bus, so we fix PCI bus id to 0, and ISA bus id to 1.
-    pci_bus = X86IntelMPBus(bus_id = 0, bus_type='PCI   ')
+    pci_bus = X86IntelMPBus(bus_id=0, bus_type="PCI   ")
     base_entries.append(pci_bus)
-    isa_bus = X86IntelMPBus(bus_id = 1, bus_type='ISA   ')
+    isa_bus = X86IntelMPBus(bus_id=1, bus_type="ISA   ")
     base_entries.append(isa_bus)
-    connect_busses = X86IntelMPBusHierarchy(bus_id=1,
-            subtractive_decode=True, parent_bus=0)
+    connect_busses = X86IntelMPBusHierarchy(
+        bus_id=1, subtractive_decode=True, parent_bus=0
+    )
     ext_entries.append(connect_busses)
     pci_dev4_inta = X86IntelMPIOIntAssignment(
-            interrupt_type = 'INT',
-            polarity = 'ConformPolarity',
-            trigger = 'ConformTrigger',
-            source_bus_id = 0,
-            source_bus_irq = 0 + (4 << 2),
-            dest_io_apic_id = io_apic.id,
-            dest_io_apic_intin = 16)
+        interrupt_type="INT",
+        polarity="ConformPolarity",
+        trigger="ConformTrigger",
+        source_bus_id=0,
+        source_bus_irq=0 + (4 << 2),
+        dest_io_apic_id=io_apic.id,
+        dest_io_apic_intin=16,
+    )
     base_entries.append(pci_dev4_inta)
     pci_dev4_inta_madt = X86ACPIMadtIntSourceOverride(
-            bus_source = pci_dev4_inta.source_bus_id,
-            irq_source = pci_dev4_inta.source_bus_irq,
-            sys_int = pci_dev4_inta.dest_io_apic_intin,
-            flags = 0
-        )
+        bus_source=pci_dev4_inta.source_bus_id,
+        irq_source=pci_dev4_inta.source_bus_irq,
+        sys_int=pci_dev4_inta.dest_io_apic_intin,
+        flags=0,
+    )
     madt_records.append(pci_dev4_inta_madt)
+
     def assignISAInt(irq, apicPin):
         assign_8259_to_apic = X86IntelMPIOIntAssignment(
-                interrupt_type = 'ExtInt',
-                polarity = 'ConformPolarity',
-                trigger = 'ConformTrigger',
-                source_bus_id = 1,
-                source_bus_irq = irq,
-                dest_io_apic_id = io_apic.id,
-                dest_io_apic_intin = 0)
+            interrupt_type="ExtInt",
+            polarity="ConformPolarity",
+            trigger="ConformTrigger",
+            source_bus_id=1,
+            source_bus_irq=irq,
+            dest_io_apic_id=io_apic.id,
+            dest_io_apic_intin=0,
+        )
         base_entries.append(assign_8259_to_apic)
         assign_to_apic = X86IntelMPIOIntAssignment(
-                interrupt_type = 'INT',
-                polarity = 'ConformPolarity',
-                trigger = 'ConformTrigger',
-                source_bus_id = 1,
-                source_bus_irq = irq,
-                dest_io_apic_id = io_apic.id,
-                dest_io_apic_intin = apicPin)
+            interrupt_type="INT",
+            polarity="ConformPolarity",
+            trigger="ConformTrigger",
+            source_bus_id=1,
+            source_bus_irq=irq,
+            dest_io_apic_id=io_apic.id,
+            dest_io_apic_intin=apicPin,
+        )
         base_entries.append(assign_to_apic)
         # acpi
         assign_to_apic_acpi = X86ACPIMadtIntSourceOverride(
-                bus_source = 1,
-                irq_source = irq,
-                sys_int = apicPin,
-                flags = 0
-            )
+            bus_source=1, irq_source=irq, sys_int=apicPin, flags=0
+        )
         madt_records.append(assign_to_apic_acpi)
+
     assignISAInt(0, 2)
     assignISAInt(1, 1)
     for i in range(3, 15):
@@ -575,64 +645,78 @@
     workload.intel_mp_table.base_entries = base_entries
     workload.intel_mp_table.ext_entries = ext_entries
 
-    madt = X86ACPIMadt(local_apic_address=0,
-            records=madt_records, oem_id='madt')
+    madt = X86ACPIMadt(
+        local_apic_address=0, records=madt_records, oem_id="madt"
+    )
     workload.acpi_description_table_pointer.rsdt.entries.append(madt)
     workload.acpi_description_table_pointer.xsdt.entries.append(madt)
-    workload.acpi_description_table_pointer.oem_id = 'gem5'
-    workload.acpi_description_table_pointer.rsdt.oem_id='gem5'
-    workload.acpi_description_table_pointer.xsdt.oem_id='gem5'
+    workload.acpi_description_table_pointer.oem_id = "gem5"
+    workload.acpi_description_table_pointer.rsdt.oem_id = "gem5"
+    workload.acpi_description_table_pointer.xsdt.oem_id = "gem5"
     return self
 
-def makeLinuxX86System(mem_mode, numCPUs=1, mdesc=None, Ruby=False,
-                       cmdline=None):
+
+def makeLinuxX86System(
+    mem_mode, numCPUs=1, mdesc=None, Ruby=False, cmdline=None
+):
     # Build up the x86 system and then specialize it for Linux
     self = makeX86System(mem_mode, numCPUs, mdesc, X86FsLinux(), Ruby)
 
     # We assume below that there's at least 1MB of memory. We'll require 2
     # just to avoid corner cases.
     phys_mem_size = sum([r.size() for r in self.mem_ranges])
-    assert(phys_mem_size >= 0x200000)
-    assert(len(self.mem_ranges) <= 2)
+    assert phys_mem_size >= 0x200000
+    assert len(self.mem_ranges) <= 2
 
-    entries = \
-       [
+    entries = [
         # Mark the first megabyte of memory as reserved
-        X86E820Entry(addr = 0, size = '639kB', range_type = 1),
-        X86E820Entry(addr = 0x9fc00, size = '385kB', range_type = 2),
+        X86E820Entry(addr=0, size="639kB", range_type=1),
+        X86E820Entry(addr=0x9FC00, size="385kB", range_type=2),
         # Mark the rest of physical memory as available
-        X86E820Entry(addr = 0x100000,
-                size = '%dB' % (self.mem_ranges[0].size() - 0x100000),
-                range_type = 1),
-        ]
+        X86E820Entry(
+            addr=0x100000,
+            size="%dB" % (self.mem_ranges[0].size() - 0x100000),
+            range_type=1,
+        ),
+    ]
 
     # Mark [mem_size, 3GB) as reserved if memory less than 3GB, which force
     # IO devices to be mapped to [0xC0000000, 0xFFFF0000). Requests to this
     # specific range can pass though bridge to iobus.
     if len(self.mem_ranges) == 1:
-        entries.append(X86E820Entry(addr = self.mem_ranges[0].size(),
-            size='%dB' % (0xC0000000 - self.mem_ranges[0].size()),
-            range_type=2))
+        entries.append(
+            X86E820Entry(
+                addr=self.mem_ranges[0].size(),
+                size="%dB" % (0xC0000000 - self.mem_ranges[0].size()),
+                range_type=2,
+            )
+        )
 
     # Reserve the last 16kB of the 32-bit address space for the m5op interface
-    entries.append(X86E820Entry(addr=0xFFFF0000, size='64kB', range_type=2))
+    entries.append(X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2))
 
     # In case the physical memory is greater than 3GB, we split it into two
     # parts and add a separate e820 entry for the second part.  This entry
     # starts at 0x100000000,  which is the first address after the space
     # reserved for devices.
     if len(self.mem_ranges) == 2:
-        entries.append(X86E820Entry(addr = 0x100000000,
-            size = '%dB' % (self.mem_ranges[1].size()), range_type = 1))
+        entries.append(
+            X86E820Entry(
+                addr=0x100000000,
+                size="%dB" % (self.mem_ranges[1].size()),
+                range_type=1,
+            )
+        )
 
     self.workload.e820_table.entries = entries
 
     # Command line
     if not cmdline:
-        cmdline = 'earlyprintk=ttyS0 console=ttyS0 lpj=7999923 root=/dev/hda1'
+        cmdline = "earlyprintk=ttyS0 console=ttyS0 lpj=7999923 root=/dev/hda1"
     self.workload.command_line = fillInCmdline(mdesc, cmdline)
     return self
 
+
 def makeBareMetalRiscvSystem(mem_mode, mdesc=None, cmdline=None):
     self = System()
     if not mdesc:
@@ -646,7 +730,7 @@
     self.iobus = IOXBar()
     self.membus = MemBus()
 
-    self.bridge = Bridge(delay='50ns')
+    self.bridge = Bridge(delay="50ns")
     self.bridge.mem_side_port = self.iobus.cpu_side_ports
     self.bridge.cpu_side_port = self.membus.mem_side_ports
     # Sv39 has 56 bit physical addresses; use the upper 8 bit for the IO space
@@ -656,16 +740,17 @@
     self.system_port = self.membus.cpu_side_ports
     return self
 
+
 def makeDualRoot(full_system, testSystem, driveSystem, dumpfile):
-    self = Root(full_system = full_system)
+    self = Root(full_system=full_system)
     self.testsys = testSystem
     self.drivesys = driveSystem
     self.etherlink = EtherLink()
 
-    if hasattr(testSystem, 'realview'):
+    if hasattr(testSystem, "realview"):
         self.etherlink.int0 = Parent.testsys.realview.ethernet.interface
         self.etherlink.int1 = Parent.drivesys.realview.ethernet.interface
-    elif hasattr(testSystem, 'tsunami'):
+    elif hasattr(testSystem, "tsunami"):
         self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
         self.etherlink.int1 = Parent.drivesys.tsunami.ethernet.interface
     else:
@@ -678,31 +763,35 @@
     return self
 
 
-def makeDistRoot(testSystem,
-                 rank,
-                 size,
-                 server_name,
-                 server_port,
-                 sync_repeat,
-                 sync_start,
-                 linkspeed,
-                 linkdelay,
-                 dumpfile):
-    self = Root(full_system = True)
+def makeDistRoot(
+    testSystem,
+    rank,
+    size,
+    server_name,
+    server_port,
+    sync_repeat,
+    sync_start,
+    linkspeed,
+    linkdelay,
+    dumpfile,
+):
+    self = Root(full_system=True)
     self.testsys = testSystem
 
-    self.etherlink = DistEtherLink(speed = linkspeed,
-                                   delay = linkdelay,
-                                   dist_rank = rank,
-                                   dist_size = size,
-                                   server_name = server_name,
-                                   server_port = server_port,
-                                   sync_start = sync_start,
-                                   sync_repeat = sync_repeat)
+    self.etherlink = DistEtherLink(
+        speed=linkspeed,
+        delay=linkdelay,
+        dist_rank=rank,
+        dist_size=size,
+        server_name=server_name,
+        server_port=server_port,
+        sync_start=sync_start,
+        sync_repeat=sync_repeat,
+    )
 
-    if hasattr(testSystem, 'realview'):
+    if hasattr(testSystem, "realview"):
         self.etherlink.int0 = Parent.testsys.realview.ethernet.interface
-    elif hasattr(testSystem, 'tsunami'):
+    elif hasattr(testSystem, "tsunami"):
         self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
     else:
         fatal("Don't know how to connect DistEtherLink to this system")
diff --git a/configs/common/FileSystemConfig.py b/configs/common/FileSystemConfig.py
index f60bf23..066eb9a 100644
--- a/configs/common/FileSystemConfig.py
+++ b/configs/common/FileSystemConfig.py
@@ -48,21 +48,25 @@
 from os.path import isdir
 from shutil import rmtree, copyfile
 
+
 def hex_mask(terms):
     dec_mask = reduce(operator.or_, [2**i for i in terms], 0)
     return "%08x" % dec_mask
 
+
 def file_append(path, contents):
-    with open(joinpath(*path), 'a') as f:
+    with open(joinpath(*path), "a") as f:
         f.write(str(contents))
 
+
 def replace_tree(path):
     if isdir(path):
         rmtree(path)
     mkdir(path)
 
-def config_filesystem(system, options = None):
-    """ This function parses the system object to create the pseudo file system
+
+def config_filesystem(system, options=None):
+    """This function parses the system object to create the pseudo file system
     @param system: The system to create the config for
     @param options: An optional argument which contains an Options.py options
            object. This is useful if when use se.py and will set the L2 cache
@@ -79,167 +83,200 @@
 
     These files are created in the `fs` directory in the outdir path.
     """
-    fsdir = joinpath(m5.options.outdir, 'fs')
+    fsdir = joinpath(m5.options.outdir, "fs")
     replace_tree(fsdir)
 
     # Set up /proc
-    procdir = joinpath(fsdir, 'proc')
+    procdir = joinpath(fsdir, "proc")
     mkdir(procdir)
 
     try:
-        cpus = \
-            [obj for obj in system.descendants() if isinstance(obj, BaseCPU)]
+        cpus = [
+            obj for obj in system.descendants() if isinstance(obj, BaseCPU)
+        ]
     except NameError:
         # BaseCPU is not defined for the NULL ISA
         cpus = []
 
     cpu_clock = 0
-    if hasattr(options, 'cpu_clock'):
+    if hasattr(options, "cpu_clock"):
         cpu_clock = toFrequency(options.cpu_clock) / mega
 
     l2_size = 0
-    if hasattr(options, 'l2_size'):
+    if hasattr(options, "l2_size"):
         l2_size = toMemorySize(options.l2_size) / kibi
 
-    for i,cpu in enumerate(cpus):
-        one_cpu = 'processor       : {proc}\n'                    + \
-                  'vendor_id       : Generic\n'                   + \
-                  'cpu family      : 0\n'                         + \
-                  'model           : 0\n'                         + \
-                  'model name      : Generic\n'                   + \
-                  'stepping        : 0\n'                         + \
-                  'cpu MHz         : {clock:0.3f}\n'              + \
-                  'cache size:     : {l2_size}K\n'                + \
-                  'physical id     : 0\n'                         + \
-                  'siblings        : {num_cpus}\n'                + \
-                  'core id         : {proc}\n'                    + \
-                  'cpu cores       : {num_cpus}\n'                + \
-                  'fpu             : yes\n'                       + \
-                  'fpu exception   : yes\n'                       + \
-                  'cpuid level     : 1\n'                         + \
-                  'wp              : yes\n'                       + \
-                  'flags           : fpu\n'                       + \
-                  'cache alignment : {cacheline_size}\n'          + \
-                  '\n'
-        one_cpu = one_cpu.format(proc = i, num_cpus = len(cpus),
-                       # Note: it would be nice to use cpu.clock, but it hasn't
-                       # been finalized yet since m5.instantiate() isn't done.
-                       clock = cpu_clock,
-                       # Note: this assumes the L2 is private to each core
-                       l2_size = l2_size,
-                       cacheline_size=system.cache_line_size.getValue())
-        file_append((procdir, 'cpuinfo'), one_cpu)
+    for i, cpu in enumerate(cpus):
+        one_cpu = (
+            "processor       : {proc}\n"
+            + "vendor_id       : Generic\n"
+            + "cpu family      : 0\n"
+            + "model           : 0\n"
+            + "model name      : Generic\n"
+            + "stepping        : 0\n"
+            + "cpu MHz         : {clock:0.3f}\n"
+            + "cache size:     : {l2_size}K\n"
+            + "physical id     : 0\n"
+            + "siblings        : {num_cpus}\n"
+            + "core id         : {proc}\n"
+            + "cpu cores       : {num_cpus}\n"
+            + "fpu             : yes\n"
+            + "fpu exception   : yes\n"
+            + "cpuid level     : 1\n"
+            + "wp              : yes\n"
+            + "flags           : fpu\n"
+            + "cache alignment : {cacheline_size}\n"
+            + "\n"
+        )
+        one_cpu = one_cpu.format(
+            proc=i,
+            num_cpus=len(cpus),
+            # Note: it would be nice to use cpu.clock, but it hasn't
+            # been finalized yet since m5.instantiate() isn't done.
+            clock=cpu_clock,
+            # Note: this assumes the L2 is private to each core
+            l2_size=l2_size,
+            cacheline_size=system.cache_line_size.getValue(),
+        )
+        file_append((procdir, "cpuinfo"), one_cpu)
 
-    file_append((procdir, 'stat'), 'cpu 0 0 0 0 0 0 0\n')
+    file_append((procdir, "stat"), "cpu 0 0 0 0 0 0 0\n")
     for i in range(len(cpus)):
-        file_append((procdir, 'stat'), 'cpu%d 0 0 0 0 0 0 0\n' % i)
+        file_append((procdir, "stat"), "cpu%d 0 0 0 0 0 0 0\n" % i)
 
     # Set up /sys
-    sysdir = joinpath(fsdir, 'sys')
+    sysdir = joinpath(fsdir, "sys")
     mkdir(sysdir)
 
     # Set up /sys/devices/system/cpu
-    cpudir = joinpath(sysdir, 'devices', 'system', 'cpu')
+    cpudir = joinpath(sysdir, "devices", "system", "cpu")
     makedirs(cpudir, exist_ok=True)
 
-    file_append((cpudir, 'online'), '0-%d' % (len(cpus) - 1))
-    file_append((cpudir, 'possible'), '0-%d' % (len(cpus) - 1))
+    file_append((cpudir, "online"), "0-%d" % (len(cpus) - 1))
+    file_append((cpudir, "possible"), "0-%d" % (len(cpus) - 1))
 
     # Set up /tmp
-    tmpdir = joinpath(fsdir, 'tmp')
+    tmpdir = joinpath(fsdir, "tmp")
     replace_tree(tmpdir)
 
     system.redirect_paths = _redirect_paths(options)
 
     # Setting the interpreter path. This is used to load the
     # guest dynamic linker itself from the elf file.
-    interp = getattr(options, 'interp_dir', None)
+    interp = getattr(options, "interp_dir", None)
     if interp:
         from m5.core import setInterpDir
+
         setInterpDir(interp)
 
-        print("Setting the interpreter path to:", interp,
-              "\nFor dynamically linked applications you might still "
-              "need to setup the --redirects so that libraries are "
-              "found\n")
+        print(
+            "Setting the interpreter path to:",
+            interp,
+            "\nFor dynamically linked applications you might still "
+            "need to setup the --redirects so that libraries are "
+            "found\n",
+        )
+
 
 def register_node(cpu_list, mem, node_number):
-    nodebasedir = joinpath(m5.options.outdir, 'fs', 'sys', 'devices',
-                           'system', 'node')
+    nodebasedir = joinpath(
+        m5.options.outdir, "fs", "sys", "devices", "system", "node"
+    )
 
-    nodedir = joinpath(nodebasedir,'node%d' % node_number)
+    nodedir = joinpath(nodebasedir, "node%d" % node_number)
     makedirs(nodedir, exist_ok=True)
 
-    file_append((nodedir, 'cpumap'), hex_mask(cpu_list))
-    file_append((nodedir, 'meminfo'),
-                'Node %d MemTotal: %dkB' % (node_number,
-                toMemorySize(str(mem))/kibi))
+    file_append((nodedir, "cpumap"), hex_mask(cpu_list))
+    file_append(
+        (nodedir, "meminfo"),
+        "Node %d MemTotal: %dkB"
+        % (node_number, toMemorySize(str(mem)) / kibi),
+    )
 
-def register_cpu(physical_package_id, core_siblings,
-                 core_id, thread_siblings):
-    cpudir = joinpath(m5.options.outdir, 'fs',  'sys', 'devices', 'system',
-                      'cpu', 'cpu%d' % core_id)
 
-    makedirs(joinpath(cpudir, 'topology'), exist_ok=True)
-    makedirs(joinpath(cpudir, 'cache'))
+def register_cpu(physical_package_id, core_siblings, core_id, thread_siblings):
+    cpudir = joinpath(
+        m5.options.outdir,
+        "fs",
+        "sys",
+        "devices",
+        "system",
+        "cpu",
+        "cpu%d" % core_id,
+    )
 
-    file_append((cpudir, 'online'), '1')
-    file_append((cpudir, 'topology', 'physical_package_id'),
-                physical_package_id)
-    file_append((cpudir, 'topology', 'core_siblings'),
-                hex_mask(core_siblings))
-    file_append((cpudir, 'topology', 'core_id'), core_id)
-    file_append((cpudir, 'topology', 'thread_siblings'),
-                hex_mask(thread_siblings))
+    makedirs(joinpath(cpudir, "topology"), exist_ok=True)
+    makedirs(joinpath(cpudir, "cache"))
+
+    file_append((cpudir, "online"), "1")
+    file_append(
+        (cpudir, "topology", "physical_package_id"), physical_package_id
+    )
+    file_append((cpudir, "topology", "core_siblings"), hex_mask(core_siblings))
+    file_append((cpudir, "topology", "core_id"), core_id)
+    file_append(
+        (cpudir, "topology", "thread_siblings"), hex_mask(thread_siblings)
+    )
+
 
 def register_cache(level, idu_type, size, line_size, assoc, cpus):
-    fsdir = joinpath(m5.options.outdir, 'fs')
+    fsdir = joinpath(m5.options.outdir, "fs")
     for i in cpus:
-        cachedir = joinpath(fsdir, 'sys', 'devices', 'system', 'cpu',
-                            'cpu%d' % i, 'cache')
+        cachedir = joinpath(
+            fsdir, "sys", "devices", "system", "cpu", "cpu%d" % i, "cache"
+        )
 
         j = 0
-        while isdir(joinpath(cachedir, 'index%d' % j)):
+        while isdir(joinpath(cachedir, "index%d" % j)):
             j += 1
-        indexdir = joinpath(cachedir, 'index%d' % j)
+        indexdir = joinpath(cachedir, "index%d" % j)
         makedirs(indexdir, exist_ok=True)
 
-        file_append((indexdir, 'level'), level)
-        file_append((indexdir, 'type'), idu_type)
-        file_append((indexdir, 'size'), "%dK" % (toMemorySize(size)/kibi))
-        file_append((indexdir, 'coherency_line_size'), line_size)
+        file_append((indexdir, "level"), level)
+        file_append((indexdir, "type"), idu_type)
+        file_append((indexdir, "size"), "%dK" % (toMemorySize(size) / kibi))
+        file_append((indexdir, "coherency_line_size"), line_size)
 
         # Since cache size = number of indices * associativity * block size
         num_sets = toMemorySize(size) / int(assoc) * int(line_size)
 
-        file_append((indexdir, 'number_of_sets'), num_sets)
-        file_append((indexdir, 'physical_line_partition'), '1')
-        file_append((indexdir, 'shared_cpu_map'), hex_mask(cpus))
-        file_append((indexdir, 'shared_cpu_list'),
-                    ','.join(str(cpu) for cpu in cpus))
+        file_append((indexdir, "number_of_sets"), num_sets)
+        file_append((indexdir, "physical_line_partition"), "1")
+        file_append((indexdir, "shared_cpu_map"), hex_mask(cpus))
+        file_append(
+            (indexdir, "shared_cpu_list"), ",".join(str(cpu) for cpu in cpus)
+        )
+
 
 def _redirect_paths(options):
     # Redirect filesystem syscalls from src to the first matching dests
-    redirect_paths = [RedirectPath(app_path = "/proc",
-                          host_paths = ["%s/fs/proc" % m5.options.outdir]),
-                      RedirectPath(app_path = "/sys",
-                          host_paths = ["%s/fs/sys"  % m5.options.outdir]),
-                      RedirectPath(app_path = "/tmp",
-                          host_paths = ["%s/fs/tmp"  % m5.options.outdir])]
+    redirect_paths = [
+        RedirectPath(
+            app_path="/proc", host_paths=["%s/fs/proc" % m5.options.outdir]
+        ),
+        RedirectPath(
+            app_path="/sys", host_paths=["%s/fs/sys" % m5.options.outdir]
+        ),
+        RedirectPath(
+            app_path="/tmp", host_paths=["%s/fs/tmp" % m5.options.outdir]
+        ),
+    ]
 
     # Setting the redirect paths so that the guest dynamic linker
     # can point to the proper /lib collection (e.g. to load libc)
-    redirects = getattr(options, 'redirects', [])
+    redirects = getattr(options, "redirects", [])
     for redirect in redirects:
         app_path, host_path = redirect.split("=")
         redirect_paths.append(
-            RedirectPath(app_path = app_path, host_paths = [ host_path ]))
+            RedirectPath(app_path=app_path, host_paths=[host_path])
+        )
 
-    chroot = getattr(options, 'chroot', None)
+    chroot = getattr(options, "chroot", None)
     if chroot:
         redirect_paths.append(
             RedirectPath(
-                app_path = "/",
-                host_paths = ["%s" % os.path.expanduser(chroot)]))
+                app_path="/", host_paths=["%s" % os.path.expanduser(chroot)]
+            )
+        )
 
     return redirect_paths
diff --git a/configs/common/GPUTLBConfig.py b/configs/common/GPUTLBConfig.py
index 740c748..b70d6c5 100644
--- a/configs/common/GPUTLBConfig.py
+++ b/configs/common/GPUTLBConfig.py
@@ -34,10 +34,12 @@
 import m5
 from m5.objects import *
 
+
 def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):
 
     if full_system:
-        constructor_call = "VegaGPUTLB(\
+        constructor_call = (
+            "VegaGPUTLB(\
                 gpu_device = gpu_ctrl, \
                 size = options.L%(level)dTLBentries, \
                 assoc = options.L%(level)dTLBassoc, \
@@ -48,9 +50,12 @@
                 clk_domain = SrcClockDomain(\
                     clock = options.gpu_clock,\
                     voltage_domain = VoltageDomain(\
-                        voltage = options.gpu_voltage)))" % locals()
+                        voltage = options.gpu_voltage)))"
+            % locals()
+        )
     else:
-        constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
+        constructor_call = (
+            "X86GPUTLB(size = options.L%(level)dTLBentries, \
                 assoc = options.L%(level)dTLBassoc, \
                 hitLatency = options.L%(level)dAccessLatency,\
                 missLatency2 = options.L%(level)dMissLatency,\
@@ -59,13 +64,17 @@
                 clk_domain = SrcClockDomain(\
                     clock = options.gpu_clock,\
                     voltage_domain = VoltageDomain(\
-                        voltage = options.gpu_voltage)))" % locals()
+                        voltage = options.gpu_voltage)))"
+            % locals()
+        )
     return constructor_call
 
+
 def Coalescer_constructor(options, level, full_system):
 
     if full_system:
-        constructor_call = "VegaTLBCoalescer(probesPerCycle = \
+        constructor_call = (
+            "VegaTLBCoalescer(probesPerCycle = \
             options.L%(level)dProbesPerCycle, \
             tlb_level  = %(level)d ,\
             coalescingWindow = options.L%(level)dCoalescingWindow,\
@@ -73,30 +82,47 @@
             clk_domain = SrcClockDomain(\
                 clock = options.gpu_clock,\
                 voltage_domain = VoltageDomain(\
-                    voltage = options.gpu_voltage)))" % locals()
+                    voltage = options.gpu_voltage)))"
+            % locals()
+        )
     else:
-        constructor_call = "TLBCoalescer(probesPerCycle = \
+        constructor_call = (
+            "TLBCoalescer(probesPerCycle = \
             options.L%(level)dProbesPerCycle, \
             coalescingWindow = options.L%(level)dCoalescingWindow,\
             disableCoalescing = options.L%(level)dDisableCoalescing,\
             clk_domain = SrcClockDomain(\
                 clock = options.gpu_clock,\
                 voltage_domain = VoltageDomain(\
-                    voltage = options.gpu_voltage)))" % locals()
+                    voltage = options.gpu_voltage)))"
+            % locals()
+        )
     return constructor_call
 
-def create_TLB_Coalescer(options, my_level, my_index, tlb_name,
-                         coalescer_name, gpu_ctrl=None, full_system=False):
+
+def create_TLB_Coalescer(
+    options,
+    my_level,
+    my_index,
+    tlb_name,
+    coalescer_name,
+    gpu_ctrl=None,
+    full_system=False,
+):
     # arguments: options, TLB level, number of private structures for this
     # Level, TLB name and  Coalescer name
     for i in range(my_index):
         tlb_name.append(
-            eval(TLB_constructor(options, my_level, gpu_ctrl, full_system)))
+            eval(TLB_constructor(options, my_level, gpu_ctrl, full_system))
+        )
         coalescer_name.append(
-            eval(Coalescer_constructor(options, my_level, full_system)))
+            eval(Coalescer_constructor(options, my_level, full_system))
+        )
 
-def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None,
-                         full_system=False):
+
+def config_tlb_hierarchy(
+    options, system, shader_idx, gpu_ctrl=None, full_system=False
+):
     n_cu = options.num_compute_units
 
     if options.TLB_config == "perLane":
@@ -111,36 +137,50 @@
         print("Bad option for TLB Configuration.")
         sys.exit(1)
 
-    #-------------------------------------------------------------------------
+    # -------------------------------------------------------------------------
     # A visual representation of the TLB hierarchy
     # for ease of configuration
     # < Modify here the width and the number of levels if you want a different
     # configuration >
     # width is the number of TLBs of the given type (i.e., D-TLB, I-TLB etc)
     # for this level
-    L1 = [{'name': 'sqc', 'width': options.num_sqc, 'TLBarray': [],
-           'CoalescerArray': []},
-          {'name': 'scalar', 'width' : options.num_scalar_cache,
-           'TLBarray': [], 'CoalescerArray': []},
-          {'name': 'l1', 'width': num_TLBs, 'TLBarray': [],
-           'CoalescerArray': []}]
+    L1 = [
+        {
+            "name": "sqc",
+            "width": options.num_sqc,
+            "TLBarray": [],
+            "CoalescerArray": [],
+        },
+        {
+            "name": "scalar",
+            "width": options.num_scalar_cache,
+            "TLBarray": [],
+            "CoalescerArray": [],
+        },
+        {
+            "name": "l1",
+            "width": num_TLBs,
+            "TLBarray": [],
+            "CoalescerArray": [],
+        },
+    ]
 
-    L2 = [{'name': 'l2', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
-    L3 = [{'name': 'l3', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
+    L2 = [{"name": "l2", "width": 1, "TLBarray": [], "CoalescerArray": []}]
+    L3 = [{"name": "l3", "width": 1, "TLBarray": [], "CoalescerArray": []}]
 
     TLB_hierarchy = [L1, L2, L3]
 
-    #-------------------------------------------------------------------------
+    # -------------------------------------------------------------------------
     # Create the hiearchy
     # Call the appropriate constructors and add objects to the system
 
     for i in range(len(TLB_hierarchy)):
         hierarchy_level = TLB_hierarchy[i]
-        level = i+1
+        level = i + 1
         for TLB_type in hierarchy_level:
-            TLB_index = TLB_type['width']
-            TLB_array = TLB_type['TLBarray']
-            Coalescer_array = TLB_type['CoalescerArray']
+            TLB_index = TLB_type["width"]
+            TLB_array = TLB_type["TLBarray"]
+            Coalescer_array = TLB_type["CoalescerArray"]
             # If the sim calls for a fixed L1 TLB size across CUs,
             # override the TLB entries option
             if options.tot_L1TLB_size:
@@ -148,71 +188,96 @@
                 if options.L1TLBassoc > options.L1TLBentries:
                     options.L1TLBassoc = options.L1TLBentries
             # call the constructors for the TLB and the Coalescer
-            create_TLB_Coalescer(options, level, TLB_index,\
-                TLB_array, Coalescer_array, gpu_ctrl, full_system)
+            create_TLB_Coalescer(
+                options,
+                level,
+                TLB_index,
+                TLB_array,
+                Coalescer_array,
+                gpu_ctrl,
+                full_system,
+            )
 
-            system_TLB_name = TLB_type['name'] + '_tlb'
-            system_Coalescer_name = TLB_type['name'] + '_coalescer'
+            system_TLB_name = TLB_type["name"] + "_tlb"
+            system_Coalescer_name = TLB_type["name"] + "_coalescer"
 
             # add the different TLB levels to the system
             # Modify here if you want to make the TLB hierarchy a child of
             # the shader.
-            exec('system.%s = TLB_array' % system_TLB_name)
-            exec('system.%s = Coalescer_array' % system_Coalescer_name)
+            exec("system.%s = TLB_array" % system_TLB_name)
+            exec("system.%s = Coalescer_array" % system_Coalescer_name)
 
-    #===========================================================
+    # ===========================================================
     # Specify the TLB hierarchy (i.e., port connections)
     # All TLBs but the last level TLB need to have a memSidePort
-    #===========================================================
+    # ===========================================================
 
     # Each TLB is connected with its Coalescer through a single port.
     # There is a one-to-one mapping of TLBs to Coalescers at a given level
     # This won't be modified no matter what the hierarchy looks like.
     for i in range(len(TLB_hierarchy)):
         hierarchy_level = TLB_hierarchy[i]
-        level = i+1
+        level = i + 1
         for TLB_type in hierarchy_level:
-            name = TLB_type['name']
-            for index in range(TLB_type['width']):
-                exec('system.%s_coalescer[%d].mem_side_ports[0] = \
-                        system.%s_tlb[%d].cpu_side_ports[0]' % \
-                        (name, index, name, index))
+            name = TLB_type["name"]
+            for index in range(TLB_type["width"]):
+                exec(
+                    "system.%s_coalescer[%d].mem_side_ports[0] = \
+                        system.%s_tlb[%d].cpu_side_ports[0]"
+                    % (name, index, name, index)
+                )
 
     # Connect the cpuSidePort of all the coalescers in level 1
     # < Modify here if you want a different configuration >
     for TLB_type in L1:
-        name = TLB_type['name']
-        num_TLBs = TLB_type['width']
-        if name == 'l1':     # L1 D-TLBs
+        name = TLB_type["name"]
+        num_TLBs = TLB_type["width"]
+        if name == "l1":  # L1 D-TLBs
             tlb_per_cu = num_TLBs // n_cu
             for cu_idx in range(n_cu):
                 if tlb_per_cu:
                     for tlb in range(tlb_per_cu):
-                        exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
-                                system.l1_coalescer[%d].cpu_side_ports[%d]' % \
-                                (shader_idx, cu_idx, tlb,
-                                    cu_idx*tlb_per_cu+tlb, 0))
+                        exec(
+                            "system.cpu[%d].CUs[%d].translation_port[%d] = \
+                                system.l1_coalescer[%d].cpu_side_ports[%d]"
+                            % (
+                                shader_idx,
+                                cu_idx,
+                                tlb,
+                                cu_idx * tlb_per_cu + tlb,
+                                0,
+                            )
+                        )
                 else:
-                    exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
-                            system.l1_coalescer[%d].cpu_side_ports[%d]' % \
-                            (shader_idx, cu_idx, tlb_per_cu,
-                                cu_idx / (n_cu / num_TLBs),
-                                cu_idx % (n_cu / num_TLBs)))
-        elif name == 'sqc': # I-TLB
+                    exec(
+                        "system.cpu[%d].CUs[%d].translation_port[%d] = \
+                            system.l1_coalescer[%d].cpu_side_ports[%d]"
+                        % (
+                            shader_idx,
+                            cu_idx,
+                            tlb_per_cu,
+                            cu_idx / (n_cu / num_TLBs),
+                            cu_idx % (n_cu / num_TLBs),
+                        )
+                    )
+        elif name == "sqc":  # I-TLB
             for index in range(n_cu):
                 sqc_tlb_index = index / options.cu_per_sqc
                 sqc_tlb_port_id = index % options.cu_per_sqc
-                exec('system.cpu[%d].CUs[%d].sqc_tlb_port = \
-                        system.sqc_coalescer[%d].cpu_side_ports[%d]' % \
-                        (shader_idx, index, sqc_tlb_index, sqc_tlb_port_id))
-        elif name == 'scalar': # Scalar D-TLB
+                exec(
+                    "system.cpu[%d].CUs[%d].sqc_tlb_port = \
+                        system.sqc_coalescer[%d].cpu_side_ports[%d]"
+                    % (shader_idx, index, sqc_tlb_index, sqc_tlb_port_id)
+                )
+        elif name == "scalar":  # Scalar D-TLB
             for index in range(n_cu):
                 scalar_tlb_index = index / options.cu_per_scalar_cache
                 scalar_tlb_port_id = index % options.cu_per_scalar_cache
-                exec('system.cpu[%d].CUs[%d].scalar_tlb_port = \
-                        system.scalar_coalescer[%d].cpu_side_ports[%d]' % \
-                        (shader_idx, index, scalar_tlb_index,
-                         scalar_tlb_port_id))
+                exec(
+                    "system.cpu[%d].CUs[%d].scalar_tlb_port = \
+                        system.scalar_coalescer[%d].cpu_side_ports[%d]"
+                    % (shader_idx, index, scalar_tlb_index, scalar_tlb_port_id)
+                )
 
     # Connect the memSidePorts of all the TLBs with the
     # cpuSidePorts of the Coalescers of the next level
@@ -220,23 +285,28 @@
     # L1 <-> L2
     l2_coalescer_index = 0
     for TLB_type in L1:
-        name = TLB_type['name']
-        for index in range(TLB_type['width']):
-            exec('system.%s_tlb[%d].mem_side_ports[0] = \
-                    system.l2_coalescer[0].cpu_side_ports[%d]' % \
-                    (name, index, l2_coalescer_index))
+        name = TLB_type["name"]
+        for index in range(TLB_type["width"]):
+            exec(
+                "system.%s_tlb[%d].mem_side_ports[0] = \
+                    system.l2_coalescer[0].cpu_side_ports[%d]"
+                % (name, index, l2_coalescer_index)
+            )
             l2_coalescer_index += 1
 
     # L2 <-> L3
-    system.l2_tlb[0].mem_side_ports[0] = \
-        system.l3_coalescer[0].cpu_side_ports[0]
+    system.l2_tlb[0].mem_side_ports[0] = system.l3_coalescer[0].cpu_side_ports[
+        0
+    ]
 
     # L3 TLB Vega page table walker to memory for full system only
     if full_system:
         for TLB_type in L3:
-            name = TLB_type['name']
-            for index in range(TLB_type['width']):
-                exec('system._dma_ports.append(system.%s_tlb[%d].walker)' % \
-                        (name, index))
+            name = TLB_type["name"]
+            for index in range(TLB_type["width"]):
+                exec(
+                    "system._dma_ports.append(system.%s_tlb[%d].walker)"
+                    % (name, index)
+                )
 
     return system
diff --git a/configs/common/GPUTLBOptions.py b/configs/common/GPUTLBOptions.py
index 3a1f9ad..1a77a2c 100644
--- a/configs/common/GPUTLBOptions.py
+++ b/configs/common/GPUTLBOptions.py
@@ -27,77 +27,105 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
+
 def tlb_options(parser):
 
-    #===================================================================
+    # ===================================================================
     # TLB Configuration
-    #===================================================================
+    # ===================================================================
 
     parser.add_argument(
-        "--TLB-config", type=str, default="perCU",
-        help="Options are: perCU (default), mono, 2CU, or perLane")
+        "--TLB-config",
+        type=str,
+        default="perCU",
+        help="Options are: perCU (default), mono, 2CU, or perLane",
+    )
 
-    #===================================================================
+    # ===================================================================
     #   L1 TLB Options (D-TLB, I-TLB, Dispatcher-TLB)
-    #===================================================================
+    # ===================================================================
 
     parser.add_argument("--L1TLBentries", type=int, default="32")
     parser.add_argument("--L1TLBassoc", type=int, default="32")
-    parser.add_argument("--L1AccessLatency", type=int, default="1",
-                        help="latency in gpu cycles")
-    parser.add_argument("--L1MissLatency", type=int, default="750",
-                        help="latency (in gpu cycles) of a page walk, "
-                        "if this is a last level TLB")
+    parser.add_argument(
+        "--L1AccessLatency",
+        type=int,
+        default="1",
+        help="latency in gpu cycles",
+    )
+    parser.add_argument(
+        "--L1MissLatency",
+        type=int,
+        default="750",
+        help="latency (in gpu cycles) of a page walk, "
+        "if this is a last level TLB",
+    )
     parser.add_argument("--L1MaxOutstandingReqs", type=int, default="64")
     parser.add_argument("--L1AccessDistanceStat", action="store_true")
     parser.add_argument("--tot-L1TLB-size", type=int, default="0")
 
-    #===================================================================
+    # ===================================================================
     #   L2 TLB Options
-    #===================================================================
+    # ===================================================================
 
     parser.add_argument("--L2TLBentries", type=int, default="4096")
     parser.add_argument("--L2TLBassoc", type=int, default="32")
-    parser.add_argument("--L2AccessLatency", type=int, default="69",
-                        help="latency in gpu cycles")
-    parser.add_argument("--L2MissLatency", type=int, default="750",
-                        help="latency (in gpu cycles) of a page walk, "
-                        "if this is a last level TLB")
+    parser.add_argument(
+        "--L2AccessLatency",
+        type=int,
+        default="69",
+        help="latency in gpu cycles",
+    )
+    parser.add_argument(
+        "--L2MissLatency",
+        type=int,
+        default="750",
+        help="latency (in gpu cycles) of a page walk, "
+        "if this is a last level TLB",
+    )
     parser.add_argument("--L2MaxOutstandingReqs", type=int, default="64")
     parser.add_argument("--L2AccessDistanceStat", action="store_true")
 
-    #===================================================================
+    # ===================================================================
     #   L3 TLB Options
-    #===================================================================
+    # ===================================================================
 
     parser.add_argument("--L3TLBentries", type=int, default="8192")
     parser.add_argument("--L3TLBassoc", type=int, default="32")
-    parser.add_argument("--L3AccessLatency", type=int, default="150",
-                        help="latency in gpu cycles")
-    parser.add_argument("--L3MissLatency", type=int, default="750",
-                        help="latency (in gpu cycles) of a page walk")
+    parser.add_argument(
+        "--L3AccessLatency",
+        type=int,
+        default="150",
+        help="latency in gpu cycles",
+    )
+    parser.add_argument(
+        "--L3MissLatency",
+        type=int,
+        default="750",
+        help="latency (in gpu cycles) of a page walk",
+    )
     parser.add_argument("--L3MaxOutstandingReqs", type=int, default="64")
     parser.add_argument("--L3AccessDistanceStat", action="store_true")
 
-    #===================================================================
+    # ===================================================================
     #   L1 TLBCoalescer Options
-    #===================================================================
+    # ===================================================================
 
     parser.add_argument("--L1ProbesPerCycle", type=int, default="2")
     parser.add_argument("--L1CoalescingWindow", type=int, default="1")
     parser.add_argument("--L1DisableCoalescing", action="store_true")
 
-    #===================================================================
+    # ===================================================================
     #   L2 TLBCoalescer Options
-    #===================================================================
+    # ===================================================================
 
     parser.add_argument("--L2ProbesPerCycle", type=int, default="2")
     parser.add_argument("--L2CoalescingWindow", type=int, default="1")
     parser.add_argument("--L2DisableCoalescing", action="store_true")
 
-    #===================================================================
+    # ===================================================================
     #   L3 TLBCoalescer Options
-    #===================================================================
+    # ===================================================================
 
     parser.add_argument("--L3ProbesPerCycle", type=int, default="2")
     parser.add_argument("--L3CoalescingWindow", type=int, default="1")
diff --git a/configs/common/HMC.py b/configs/common/HMC.py
index b12bd0a..f8321f3 100644
--- a/configs/common/HMC.py
+++ b/configs/common/HMC.py
@@ -129,159 +129,303 @@
 def add_options(parser):
     # *****************************CROSSBAR PARAMETERS*************************
     # Flit size of the main interconnect [1]
-    parser.add_argument("--xbar-width", default=32, action="store", type=int,
-                        help="Data width of the main XBar (Bytes)")
+    parser.add_argument(
+        "--xbar-width",
+        default=32,
+        action="store",
+        type=int,
+        help="Data width of the main XBar (Bytes)",
+    )
 
     # Clock frequency of the main interconnect [1]
     # This crossbar, is placed on the logic-based of the HMC and it has its
     # own voltage and clock domains, different from the DRAM dies or from the
     # host.
-    parser.add_argument("--xbar-frequency", default='1GHz', type=str,
-                        help="Clock Frequency of the main XBar")
+    parser.add_argument(
+        "--xbar-frequency",
+        default="1GHz",
+        type=str,
+        help="Clock Frequency of the main XBar",
+    )
 
     # Arbitration latency of the HMC XBar [1]
-    parser.add_argument("--xbar-frontend-latency", default=1, action="store",
-                        type=int, help="Arbitration latency of the XBar")
+    parser.add_argument(
+        "--xbar-frontend-latency",
+        default=1,
+        action="store",
+        type=int,
+        help="Arbitration latency of the XBar",
+    )
 
     # Latency to forward a packet via the interconnect [1](two levels of FIFOs
     # at the input and output of the inteconnect)
-    parser.add_argument("--xbar-forward-latency", default=2, action="store",
-                        type=int, help="Forward latency of the XBar")
+    parser.add_argument(
+        "--xbar-forward-latency",
+        default=2,
+        action="store",
+        type=int,
+        help="Forward latency of the XBar",
+    )
 
     # Latency to forward a response via the interconnect [1](two levels of
     # FIFOs at the input and output of the inteconnect)
-    parser.add_argument("--xbar-response-latency", default=2, action="store",
-                        type=int, help="Response latency of the XBar")
+    parser.add_argument(
+        "--xbar-response-latency",
+        default=2,
+        action="store",
+        type=int,
+        help="Response latency of the XBar",
+    )
 
     # number of cross which connects 16 Vaults to serial link[7]
-    parser.add_argument("--number-mem-crossbar", default=4, action="store",
-                        type=int, help="Number of crossbar in HMC")
+    parser.add_argument(
+        "--number-mem-crossbar",
+        default=4,
+        action="store",
+        type=int,
+        help="Number of crossbar in HMC",
+    )
 
     # *****************************SERIAL LINK PARAMETERS**********************
     # Number of serial links controllers [1]
-    parser.add_argument("--num-links-controllers", default=4, action="store",
-                        type=int, help="Number of serial links")
+    parser.add_argument(
+        "--num-links-controllers",
+        default=4,
+        action="store",
+        type=int,
+        help="Number of serial links",
+    )
 
     # Number of packets (not flits) to store at the request side of the serial
     #  link. This number should be adjusted to achive required bandwidth
-    parser.add_argument("--link-buffer-size-req", default=10, action="store",
-                        type=int, help="Number of packets to buffer at the\
-                        request side of the serial link")
+    parser.add_argument(
+        "--link-buffer-size-req",
+        default=10,
+        action="store",
+        type=int,
+        help="Number of packets to buffer at the\
+                        request side of the serial link",
+    )
 
     # Number of packets (not flits) to store at the response side of the serial
     #  link. This number should be adjusted to achive required bandwidth
-    parser.add_argument("--link-buffer-size-rsp", default=10, action="store",
-                        type=int, help="Number of packets to buffer at the\
-                        response side of the serial link")
+    parser.add_argument(
+        "--link-buffer-size-rsp",
+        default=10,
+        action="store",
+        type=int,
+        help="Number of packets to buffer at the\
+                        response side of the serial link",
+    )
 
     # Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus
     # the PCB trace latency (3ns Estimated based on [5])
-    parser.add_argument("--link-latency", default='4.6ns', type=str,
-                        help="Latency of the serial links")
+    parser.add_argument(
+        "--link-latency",
+        default="4.6ns",
+        type=str,
+        help="Latency of the serial links",
+    )
 
     # Clock frequency of the each serial link(SerDes) [1]
-    parser.add_argument("--link-frequency", default='10GHz', type=str,
-                        help="Clock Frequency of the serial links")
+    parser.add_argument(
+        "--link-frequency",
+        default="10GHz",
+        type=str,
+        help="Clock Frequency of the serial links",
+    )
 
     # Clock frequency of serial link Controller[6]
     # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] /
     # data_path_width * 10^6
     # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz
-    parser.add_argument("--link-controller-frequency", default='625MHz',
-                        type=str, help="Clock Frequency of the link\
-                        controller")
+    parser.add_argument(
+        "--link-controller-frequency",
+        default="625MHz",
+        type=str,
+        help="Clock Frequency of the link\
+                        controller",
+    )
 
     # Latency of the serial link controller to process the packets[1][6]
     # (ClockDomain = 625 Mhz )
     # used here for calculations only
-    parser.add_argument("--link-ctrl-latency", default=4, action="store",
-                        type=int, help="The number of cycles required for the\
-                        controller to process the packet")
+    parser.add_argument(
+        "--link-ctrl-latency",
+        default=4,
+        action="store",
+        type=int,
+        help="The number of cycles required for the\
+                        controller to process the packet",
+    )
 
     # total_ctrl_latency = link_ctrl_latency + link_latency
     # total_ctrl_latency = 4(Cycles) * 1.6 ns +  4.6 ns
-    parser.add_argument("--total-ctrl-latency", default='11ns', type=str,
-                        help="The latency experienced by every packet\
-                        regardless of size of packet")
+    parser.add_argument(
+        "--total-ctrl-latency",
+        default="11ns",
+        type=str,
+        help="The latency experienced by every packet\
+                        regardless of size of packet",
+    )
 
     # Number of parallel lanes in each serial link [1]
-    parser.add_argument("--num-lanes-per-link", default=16, action="store",
-                        type=int, help="Number of lanes per each link")
+    parser.add_argument(
+        "--num-lanes-per-link",
+        default=16,
+        action="store",
+        type=int,
+        help="Number of lanes per each link",
+    )
 
     # Number of serial links [1]
-    parser.add_argument("--num-serial-links", default=4, action="store",
-                        type=int, help="Number of serial links")
+    parser.add_argument(
+        "--num-serial-links",
+        default=4,
+        action="store",
+        type=int,
+        help="Number of serial links",
+    )
 
     # speed of each lane of serial link - SerDes serial interface 10 Gb/s
-    parser.add_argument("--serial-link-speed", default=10, action="store",
-                        type=int, help="Gbs/s speed of each lane of serial\
-                        link")
+    parser.add_argument(
+        "--serial-link-speed",
+        default=10,
+        action="store",
+        type=int,
+        help="Gbs/s speed of each lane of serial\
+                        link",
+    )
 
     # address range for each of the serial links
-    parser.add_argument("--serial-link-addr-range", default='1GB', type=str,
-                        help="memory range for each of the serial links.\
-                        Default: 1GB")
+    parser.add_argument(
+        "--serial-link-addr-range",
+        default="1GB",
+        type=str,
+        help="memory range for each of the serial links.\
+                        Default: 1GB",
+    )
 
     # *****************************PERFORMANCE MONITORING*********************
     # The main monitor behind the HMC Controller
-    parser.add_argument("--enable-global-monitor", action="store_true",
-                        help="The main monitor behind the HMC Controller")
+    parser.add_argument(
+        "--enable-global-monitor",
+        action="store_true",
+        help="The main monitor behind the HMC Controller",
+    )
 
     # The link performance monitors
-    parser.add_argument("--enable-link-monitor", action="store_true",
-                        help="The link monitors")
+    parser.add_argument(
+        "--enable-link-monitor", action="store_true", help="The link monitors"
+    )
 
     # link aggregator enable - put a cross between buffers & links
-    parser.add_argument("--enable-link-aggr", action="store_true", help="The\
-                        crossbar between port and Link Controller")
+    parser.add_argument(
+        "--enable-link-aggr",
+        action="store_true",
+        help="The\
+                        crossbar between port and Link Controller",
+    )
 
-    parser.add_argument("--enable-buff-div", action="store_true",
-                        help="Memory Range of Buffer is ivided between total\
-                        range")
+    parser.add_argument(
+        "--enable-buff-div",
+        action="store_true",
+        help="Memory Range of Buffer is ivided between total\
+                        range",
+    )
 
     # *****************************HMC ARCHITECTURE **************************
     # Memory chunk for 16 vault - numbers of vault / number of crossbars
-    parser.add_argument("--mem-chunk", default=4, action="store", type=int,
-                        help="Chunk of memory range for each cross bar in\
-                        arch 0")
+    parser.add_argument(
+        "--mem-chunk",
+        default=4,
+        action="store",
+        type=int,
+        help="Chunk of memory range for each cross bar in\
+                        arch 0",
+    )
 
     # size of req buffer within crossbar, used for modelling extra latency
     # when the reuqest go to non-local vault
-    parser.add_argument("--xbar-buffer-size-req", default=10, action="store",
-                        type=int, help="Number of packets to buffer at the\
-                        request side of the crossbar")
+    parser.add_argument(
+        "--xbar-buffer-size-req",
+        default=10,
+        action="store",
+        type=int,
+        help="Number of packets to buffer at the\
+                        request side of the crossbar",
+    )
 
     # size of response buffer within crossbar, used for modelling extra latency
     # when the response received from non-local vault
-    parser.add_argument("--xbar-buffer-size-resp", default=10, action="store",
-                        type=int, help="Number of packets to buffer at the\
-                        response side of the crossbar")
+    parser.add_argument(
+        "--xbar-buffer-size-resp",
+        default=10,
+        action="store",
+        type=int,
+        help="Number of packets to buffer at the\
+                        response side of the crossbar",
+    )
     # HMC device architecture. It affects the HMC host controller as well
-    parser.add_argument("--arch", type=str, choices=["same", "distributed",
-                        "mixed"], default="distributed", help="same: HMC with\
+    parser.add_argument(
+        "--arch",
+        type=str,
+        choices=["same", "distributed", "mixed"],
+        default="distributed",
+        help="same: HMC with\
                         4 links, all with same range.\ndistributed: HMC with\
                         4 links with distributed range.\nmixed: mixed with\
-                        same and distributed range.\nDefault: distributed")
+                        same and distributed range.\nDefault: distributed",
+    )
     # HMC device - number of vaults
-    parser.add_argument("--hmc-dev-num-vaults", default=16, action="store",
-                        type=int, help="number of independent vaults within\
+    parser.add_argument(
+        "--hmc-dev-num-vaults",
+        default=16,
+        action="store",
+        type=int,
+        help="number of independent vaults within\
                         the HMC device. Note: each vault has a memory\
-                        controller (valut controller)\nDefault: 16")
+                        controller (valut controller)\nDefault: 16",
+    )
     # HMC device - vault capacity or size
-    parser.add_argument("--hmc-dev-vault-size", default='256MB', type=str,
-                        help="vault storage capacity in bytes. Default:\
-                        256MB")
-    parser.add_argument("--mem-type", type=str, choices=["HMC_2500_1x32"],
-                        default="HMC_2500_1x32", help="type of HMC memory to\
-                        use. Default: HMC_2500_1x32")
-    parser.add_argument("--mem-channels", default=1, action="store", type=int,
-                        help="Number of memory channels")
-    parser.add_argument("--mem-ranks", default=1, action="store", type=int,
-                        help="Number of ranks to iterate across")
-    parser.add_argument("--burst-length", default=256, action="store",
-                        type=int, help="burst length in bytes. Note: the\
+    parser.add_argument(
+        "--hmc-dev-vault-size",
+        default="256MB",
+        type=str,
+        help="vault storage capacity in bytes. Default:\
+                        256MB",
+    )
+    parser.add_argument(
+        "--mem-type",
+        type=str,
+        choices=["HMC_2500_1x32"],
+        default="HMC_2500_1x32",
+        help="type of HMC memory to\
+                        use. Default: HMC_2500_1x32",
+    )
+    parser.add_argument(
+        "--mem-channels",
+        default=1,
+        action="store",
+        type=int,
+        help="Number of memory channels",
+    )
+    parser.add_argument(
+        "--mem-ranks",
+        default=1,
+        action="store",
+        type=int,
+        help="Number of ranks to iterate across",
+    )
+    parser.add_argument(
+        "--burst-length",
+        default=256,
+        action="store",
+        type=int,
+        help="burst length in bytes. Note: the\
                         cache line size will be set to this value.\nDefault:\
-                        256")
+                        256",
+    )
 
 
 # configure HMC host controller
@@ -292,8 +436,8 @@
 
     # Create additional crossbar for arch1
     if opt.arch == "distributed" or opt.arch == "mixed":
-        clk = '100GHz'
-        vd = VoltageDomain(voltage='1V')
+        clk = "100GHz"
+        vd = VoltageDomain(voltage="1V")
         # Create additional crossbar for arch1
         system.membus = NoncoherentXBar(width=8)
         system.membus.badaddr_responder = BadAddr()
@@ -310,42 +454,50 @@
     # Memmory ranges of serial link for arch-0. Same as the ranges of vault
     # controllers (4 vaults to 1 serial link)
     if opt.arch == "same":
-        ser_ranges = [AddrRange(0, (4*slar)-1) for i in
-                      range(opt.num_serial_links)]
+        ser_ranges = [
+            AddrRange(0, (4 * slar) - 1) for i in range(opt.num_serial_links)
+        ]
     # Memmory ranges of serial link for arch-1. Distributed range accross
     # links
     if opt.arch == "distributed":
-        ser_ranges = [AddrRange(i*slar, ((i+1)*slar)-1) for i in
-                      range(opt.num_serial_links)]
+        ser_ranges = [
+            AddrRange(i * slar, ((i + 1) * slar) - 1)
+            for i in range(opt.num_serial_links)
+        ]
     # Memmory ranges of serial link for arch-2 'Mixed' address distribution
     # over links
     if opt.arch == "mixed":
-        ser_range0 = AddrRange(0, (1*slar)-1)
-        ser_range1 = AddrRange(1*slar, 2*slar-1)
-        ser_range2 = AddrRange(0, (4*slar)-1)
-        ser_range3 = AddrRange(0, (4*slar)-1)
+        ser_range0 = AddrRange(0, (1 * slar) - 1)
+        ser_range1 = AddrRange(1 * slar, 2 * slar - 1)
+        ser_range2 = AddrRange(0, (4 * slar) - 1)
+        ser_range3 = AddrRange(0, (4 * slar) - 1)
         ser_ranges = [ser_range0, ser_range1, ser_range2, ser_range3]
 
     # Serial link Controller with 16 SerDes links at 10 Gbps with serial link
     # ranges w.r.t to architecture
-    sl = [SerialLink(ranges=ser_ranges[i],
-                     req_size=opt.link_buffer_size_req,
-                     resp_size=opt.link_buffer_size_rsp,
-                     num_lanes=opt.num_lanes_per_link,
-                     link_speed=opt.serial_link_speed,
-                     delay=opt.total_ctrl_latency) for i in
-          range(opt.num_serial_links)]
+    sl = [
+        SerialLink(
+            ranges=ser_ranges[i],
+            req_size=opt.link_buffer_size_req,
+            resp_size=opt.link_buffer_size_rsp,
+            num_lanes=opt.num_lanes_per_link,
+            link_speed=opt.serial_link_speed,
+            delay=opt.total_ctrl_latency,
+        )
+        for i in range(opt.num_serial_links)
+    ]
     system.hmc_host.seriallink = sl
 
     # enable global monitor
     if opt.enable_global_monitor:
-        system.hmc_host.lmonitor = [CommMonitor() for i in
-                                    range(opt.num_serial_links)]
+        system.hmc_host.lmonitor = [
+            CommMonitor() for i in range(opt.num_serial_links)
+        ]
 
     # set the clock frequency for serial link
     for i in range(opt.num_serial_links):
         clk = opt.link_controller_frequency
-        vd = VoltageDomain(voltage='1V')
+        vd = VoltageDomain(voltage="1V")
         scd = SrcClockDomain(clock=clk, voltage_domain=vd)
         system.hmc_host.seriallink[i].clk_domain = scd
 
@@ -387,8 +539,10 @@
 
     # create memory ranges for the vault controllers
     arv = convert.toMemorySize(opt.hmc_dev_vault_size)
-    addr_ranges_vaults = [AddrRange(i*arv, ((i+1)*arv-1)) for i in
-                          range(opt.hmc_dev_num_vaults)]
+    addr_ranges_vaults = [
+        AddrRange(i * arv, ((i + 1) * arv - 1))
+        for i in range(opt.hmc_dev_num_vaults)
+    ]
     system.mem_ranges = addr_ranges_vaults
 
     if opt.enable_link_monitor:
@@ -396,29 +550,36 @@
         system.hmc_dev.lmonitor = lm
 
     # 4 HMC Crossbars located in its logic-base (LoB)
-    xb = [NoncoherentXBar(width=opt.xbar_width,
-                          frontend_latency=opt.xbar_frontend_latency,
-                          forward_latency=opt.xbar_forward_latency,
-                          response_latency=opt.xbar_response_latency) for i in
-          range(opt.number_mem_crossbar)]
+    xb = [
+        NoncoherentXBar(
+            width=opt.xbar_width,
+            frontend_latency=opt.xbar_frontend_latency,
+            forward_latency=opt.xbar_forward_latency,
+            response_latency=opt.xbar_response_latency,
+        )
+        for i in range(opt.number_mem_crossbar)
+    ]
     system.hmc_dev.xbar = xb
 
     for i in range(opt.number_mem_crossbar):
         clk = opt.xbar_frequency
-        vd = VoltageDomain(voltage='1V')
+        vd = VoltageDomain(voltage="1V")
         scd = SrcClockDomain(clock=clk, voltage_domain=vd)
         system.hmc_dev.xbar[i].clk_domain = scd
 
     # Attach 4 serial link to 4 crossbar/s
     for i in range(opt.num_serial_links):
         if opt.enable_link_monitor:
-            system.hmc_host.seriallink[i].mem_side_port = \
-                system.hmc_dev.lmonitor[i].cpu_side_port
-            system.hmc_dev.lmonitor[i].mem_side_port = \
-                system.hmc_dev.xbar[i].cpu_side_ports
+            system.hmc_host.seriallink[
+                i
+            ].mem_side_port = system.hmc_dev.lmonitor[i].cpu_side_port
+            system.hmc_dev.lmonitor[i].mem_side_port = system.hmc_dev.xbar[
+                i
+            ].cpu_side_ports
         else:
-            system.hmc_host.seriallink[i].mem_side_port = \
-                system.hmc_dev.xbar[i].cpu_side_ports
+            system.hmc_host.seriallink[i].mem_side_port = system.hmc_dev.xbar[
+                i
+            ].cpu_side_ports
 
     # Connecting xbar with each other for request arriving at the wrong xbar,
     # then it will be forward to correct xbar. Bridge is used to connect xbars
@@ -426,9 +587,13 @@
         numx = len(system.hmc_dev.xbar)
 
         # create a list of buffers
-        system.hmc_dev.buffers = [Bridge(req_size=opt.xbar_buffer_size_req,
-                                         resp_size=opt.xbar_buffer_size_resp)
-                                  for i in range(numx*(opt.mem_chunk-1))]
+        system.hmc_dev.buffers = [
+            Bridge(
+                req_size=opt.xbar_buffer_size_req,
+                resp_size=opt.xbar_buffer_size_resp,
+            )
+            for i in range(numx * (opt.mem_chunk - 1))
+        ]
 
         # Buffer iterator
         it = iter(list(range(len(system.hmc_dev.buffers))))
@@ -446,14 +611,18 @@
 
                     # Change the default values for ranges of bridge
                     system.hmc_dev.buffers[index].ranges = system.mem_ranges[
-                            j * int(opt.mem_chunk):
-                            (j + 1) * int(opt.mem_chunk)]
+                        j * int(opt.mem_chunk) : (j + 1) * int(opt.mem_chunk)
+                    ]
 
                     # Connect the bridge between corssbars
-                    system.hmc_dev.xbar[i].mem_side_ports = \
-                        system.hmc_dev.buffers[index].cpu_side_port
-                    system.hmc_dev.buffers[index].mem_side_port = \
-                        system.hmc_dev.xbar[j].cpu_side_ports
+                    system.hmc_dev.xbar[
+                        i
+                    ].mem_side_ports = system.hmc_dev.buffers[
+                        index
+                    ].cpu_side_port
+                    system.hmc_dev.buffers[
+                        index
+                    ].mem_side_port = system.hmc_dev.xbar[j].cpu_side_ports
                 else:
                     # Don't connect the xbar to itself
                     pass
@@ -462,37 +631,49 @@
     # can only direct traffic to it local vaults
     if opt.arch == "mixed":
         system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
-        system.hmc_dev.xbar[3].mem_side_ports = \
-            system.hmc_dev.buffer30.cpu_side_port
-        system.hmc_dev.buffer30.mem_side_port = \
-            system.hmc_dev.xbar[0].cpu_side_ports
+        system.hmc_dev.xbar[
+            3
+        ].mem_side_ports = system.hmc_dev.buffer30.cpu_side_port
+        system.hmc_dev.buffer30.mem_side_port = system.hmc_dev.xbar[
+            0
+        ].cpu_side_ports
 
         system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
-        system.hmc_dev.xbar[3].mem_side_ports = \
-            system.hmc_dev.buffer31.cpu_side_port
-        system.hmc_dev.buffer31.mem_side_port = \
-            system.hmc_dev.xbar[1].cpu_side_ports
+        system.hmc_dev.xbar[
+            3
+        ].mem_side_ports = system.hmc_dev.buffer31.cpu_side_port
+        system.hmc_dev.buffer31.mem_side_port = system.hmc_dev.xbar[
+            1
+        ].cpu_side_ports
 
         system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
-        system.hmc_dev.xbar[3].mem_side_ports = \
-            system.hmc_dev.buffer32.cpu_side_port
-        system.hmc_dev.buffer32.mem_side_port = \
-            system.hmc_dev.xbar[2].cpu_side_ports
+        system.hmc_dev.xbar[
+            3
+        ].mem_side_ports = system.hmc_dev.buffer32.cpu_side_port
+        system.hmc_dev.buffer32.mem_side_port = system.hmc_dev.xbar[
+            2
+        ].cpu_side_ports
 
         system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
-        system.hmc_dev.xbar[2].mem_side_ports = \
-            system.hmc_dev.buffer20.cpu_side_port
-        system.hmc_dev.buffer20.mem_side_port = \
-            system.hmc_dev.xbar[0].cpu_side_ports
+        system.hmc_dev.xbar[
+            2
+        ].mem_side_ports = system.hmc_dev.buffer20.cpu_side_port
+        system.hmc_dev.buffer20.mem_side_port = system.hmc_dev.xbar[
+            0
+        ].cpu_side_ports
 
         system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
-        system.hmc_dev.xbar[2].mem_side_ports = \
-            system.hmc_dev.buffer21.cpu_side_port
-        system.hmc_dev.buffer21.mem_side_port = \
-            system.hmc_dev.xbar[1].cpu_side_ports
+        system.hmc_dev.xbar[
+            2
+        ].mem_side_ports = system.hmc_dev.buffer21.cpu_side_port
+        system.hmc_dev.buffer21.mem_side_port = system.hmc_dev.xbar[
+            1
+        ].cpu_side_ports
 
         system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
-        system.hmc_dev.xbar[2].mem_side_ports = \
-            system.hmc_dev.buffer23.cpu_side_port
-        system.hmc_dev.buffer23.mem_side_port = \
-            system.hmc_dev.xbar[3].cpu_side_ports
+        system.hmc_dev.xbar[
+            2
+        ].mem_side_ports = system.hmc_dev.buffer23.cpu_side_port
+        system.hmc_dev.buffer23.mem_side_port = system.hmc_dev.xbar[
+            3
+        ].cpu_side_ports
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index 332fd6b..baa0d23 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -37,8 +37,8 @@
 from common import ObjectList
 from common import HMC
 
-def create_mem_intf(intf, r, i, intlv_bits, intlv_size,
-                    xor_low_bit):
+
+def create_mem_intf(intf, r, i, intlv_bits, intlv_size, xor_low_bit):
     """
     Helper function for creating a single memoy controller from the given
     options.  This function is invoked multiple times in config_mem function
@@ -46,6 +46,7 @@
     """
 
     import math
+
     intlv_low_bit = int(math.log(intlv_size, 2))
 
     # Use basic hashing for the channel selection, and preferably use
@@ -53,7 +54,7 @@
     # the details of the caches here, make an educated guess. 4 MByte
     # 4-way associative with 64 byte cache lines is 6 offset bits and
     # 14 index bits.
-    if (xor_low_bit):
+    if xor_low_bit:
         xor_high_bit = xor_low_bit + intlv_bits - 1
     else:
         xor_high_bit = 0
@@ -67,13 +68,15 @@
         # If the channel bits are appearing after the column
         # bits, we need to add the appropriate number of bits
         # for the row buffer size
-        if interface.addr_mapping.value == 'RoRaBaChCo':
+        if interface.addr_mapping.value == "RoRaBaChCo":
             # This computation only really needs to happen
             # once, but as we rely on having an instance we
             # end up having to repeat it for each and every
             # one
-            rowbuffer_size = interface.device_rowbuffer_size.value * \
-                interface.devices_per_rank.value
+            rowbuffer_size = (
+                interface.device_rowbuffer_size.value
+                * interface.devices_per_rank.value
+            )
 
             intlv_low_bit = int(math.log(rowbuffer_size, 2))
 
@@ -83,7 +86,7 @@
         # If the channel bits are appearing after the low order
         # address bits (buffer bits), we need to add the appropriate
         # number of bits for the buffer size
-        if interface.addr_mapping.value == 'RoRaBaChCo':
+        if interface.addr_mapping.value == "RoRaBaChCo":
             # This computation only really needs to happen
             # once, but as we rely on having an instance we
             # end up having to repeat it for each and every
@@ -94,14 +97,17 @@
 
     # We got all we need to configure the appropriate address
     # range
-    interface.range = m5.objects.AddrRange(r.start, size = r.size(),
-                                      intlvHighBit = \
-                                          intlv_low_bit + intlv_bits - 1,
-                                      xorHighBit = xor_high_bit,
-                                      intlvBits = intlv_bits,
-                                      intlvMatch = i)
+    interface.range = m5.objects.AddrRange(
+        r.start,
+        size=r.size(),
+        intlvHighBit=intlv_low_bit + intlv_bits - 1,
+        xorHighBit=xor_high_bit,
+        intlvBits=intlv_bits,
+        intlvMatch=i,
+    )
     return interface
 
+
 def config_mem(options, system):
     """
     Create the memory controllers based on the options and attach them.
@@ -125,8 +131,9 @@
 
     # Optional options
     opt_tlm_memory = getattr(options, "tlm_memory", None)
-    opt_external_memory_system = getattr(options, "external_memory_system",
-                                         None)
+    opt_external_memory_system = getattr(
+        options, "external_memory_system", None
+    )
     opt_elastic_trace_en = getattr(options, "elastic_trace_en", False)
     opt_mem_ranks = getattr(options, "mem_ranks", None)
     opt_nvm_ranks = getattr(options, "nvm_ranks", None)
@@ -149,15 +156,18 @@
             port_type="tlm_slave",
             port_data=opt_tlm_memory,
             port=system.membus.mem_side_ports,
-            addr_ranges=system.mem_ranges)
+            addr_ranges=system.mem_ranges,
+        )
         system.workload.addr_check = False
         return
 
     if opt_external_memory_system:
         subsystem.external_memory = m5.objects.ExternalSlave(
             port_type=opt_external_memory_system,
-            port_data="init_mem0", port=xbar.mem_side_ports,
-            addr_ranges=system.mem_ranges)
+            port_data="init_mem0",
+            port=xbar.mem_side_ports,
+            addr_ranges=system.mem_ranges,
+        )
         subsystem.workload.addr_check = False
         return
 
@@ -165,8 +175,9 @@
 
     import math
     from m5.util import fatal
+
     intlv_bits = int(math.log(nbr_mem_ctrls, 2))
-    if 2 ** intlv_bits != nbr_mem_ctrls:
+    if 2**intlv_bits != nbr_mem_ctrls:
         fatal("Number of memory channels must be a power of 2")
 
     if opt_mem_type:
@@ -178,8 +189,10 @@
     mem_ctrls = []
 
     if opt_elastic_trace_en and not issubclass(intf, m5.objects.SimpleMemory):
-        fatal("When elastic trace is enabled, configure mem-type as "
-                "simple-mem.")
+        fatal(
+            "When elastic trace is enabled, configure mem-type as "
+            "simple-mem."
+        )
 
     # The default behaviour is to interleave memory channels on 128
     # byte granularity, or cache line granularity if larger than 128
@@ -199,13 +212,16 @@
         for i in range(nbr_mem_ctrls):
             if opt_mem_type and (not opt_nvm_type or range_iter % 2 != 0):
                 # Create the DRAM interface
-                dram_intf = create_mem_intf(intf, r, i,
-                    intlv_bits, intlv_size, opt_xor_low_bit)
+                dram_intf = create_mem_intf(
+                    intf, r, i, intlv_bits, intlv_size, opt_xor_low_bit
+                )
 
                 # Set the number of ranks based on the command-line
                 # options if it was explicitly set
-                if issubclass(intf, m5.objects.DRAMInterface) and \
-                   opt_mem_ranks:
+                if (
+                    issubclass(intf, m5.objects.DRAMInterface)
+                    and opt_mem_ranks
+                ):
                     dram_intf.ranks_per_channel = opt_mem_ranks
 
                 # Enable low-power DRAM states if option is set
@@ -213,9 +229,11 @@
                     dram_intf.enable_dram_powerdown = opt_dram_powerdown
 
                 if opt_elastic_trace_en:
-                    dram_intf.latency = '1ns'
-                    print("For elastic trace, over-riding Simple Memory "
-                        "latency to 1ns.")
+                    dram_intf.latency = "1ns"
+                    print(
+                        "For elastic trace, over-riding Simple Memory "
+                        "latency to 1ns."
+                    )
 
                 # Create the controller that will drive the interface
                 mem_ctrl = dram_intf.controller()
@@ -223,13 +241,16 @@
                 mem_ctrls.append(mem_ctrl)
 
             elif opt_nvm_type and (not opt_mem_type or range_iter % 2 == 0):
-                nvm_intf = create_mem_intf(n_intf, r, i,
-                    intlv_bits, intlv_size, opt_xor_low_bit)
+                nvm_intf = create_mem_intf(
+                    n_intf, r, i, intlv_bits, intlv_size, opt_xor_low_bit
+                )
 
                 # Set the number of ranks based on the command-line
                 # options if it was explicitly set
-                if issubclass(n_intf, m5.objects.NVMInterface) and \
-                   opt_nvm_ranks:
+                if (
+                    issubclass(n_intf, m5.objects.NVMInterface)
+                    and opt_nvm_ranks
+                ):
                     nvm_intf.ranks_per_channel = opt_nvm_ranks
 
                 # Create a controller if not sharing a channel with DRAM
@@ -244,13 +265,13 @@
 
     # hook up NVM interface when channel is shared with DRAM + NVM
     for i in range(len(nvm_intfs)):
-        mem_ctrls[i].nvm = nvm_intfs[i];
+        mem_ctrls[i].nvm = nvm_intfs[i]
 
     # Connect the controller to the xbar port
     for i in range(len(mem_ctrls)):
         if opt_mem_type == "HMC_2500_1x32":
             # Connect the controllers to the membus
-            mem_ctrls[i].port = xbar[i//4].mem_side_ports
+            mem_ctrls[i].port = xbar[i // 4].mem_side_ports
             # Set memory device size. There is an independent controller
             # for each vault. All vaults are same size.
             mem_ctrls[i].dram.device_size = options.hmc_dev_vault_size
diff --git a/configs/common/ObjectList.py b/configs/common/ObjectList.py
index 685dbc1..ce52967 100644
--- a/configs/common/ObjectList.py
+++ b/configs/common/ObjectList.py
@@ -34,18 +34,20 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from gem5.runtime import get_supported_isas
 import m5.objects
 import m5.internal.params
 import inspect
 import sys
 from textwrap import TextWrapper
 
+
 class ObjectList(object):
-    """ Creates a list of objects that are sub-classes of a given class. """
+    """Creates a list of objects that are sub-classes of a given class."""
 
     def _is_obj_class(self, cls):
         """Determine if a class is a a sub class of the provided base class
-           that can be instantiated.
+        that can be instantiated.
         """
 
         # We can't use the normal inspect.isclass because the ParamFactory
@@ -63,16 +65,20 @@
             sub_cls = self._sub_classes[real_name]
             return sub_cls
         except KeyError:
-            print("{} is not a valid sub-class of {}.".format(name, \
-                self.base_cls))
+            print(
+                "{} is not a valid sub-class of {}.".format(
+                    name, self.base_cls
+                )
+            )
             raise
 
     def print(self):
         """Print a list of available sub-classes and aliases."""
 
         print("Available {} classes:".format(self.base_cls))
-        doc_wrapper = TextWrapper(initial_indent="\t\t",
-            subsequent_indent="\t\t")
+        doc_wrapper = TextWrapper(
+            initial_indent="\t\t", subsequent_indent="\t\t"
+        )
         for name, cls in list(self._sub_classes.items()):
             print("\t{}".format(name))
 
@@ -117,6 +123,7 @@
         self._aliases = {}
         self._add_aliases(aliases)
 
+
 class CPUList(ObjectList):
     def _is_obj_class(self, cls):
         """Determine if a class is a CPU that can be instantiated"""
@@ -124,35 +131,42 @@
         # We can't use the normal inspect.isclass because the ParamFactory
         # and ProxyFactory classes have a tendency to confuse it.
         try:
-            return super(CPUList, self)._is_obj_class(cls) and \
-                not issubclass(cls, m5.objects.CheckerCPU)
+            return super(CPUList, self)._is_obj_class(cls) and not issubclass(
+                cls, m5.objects.CheckerCPU
+            )
         except (TypeError, AttributeError):
             return False
 
     def _add_objects(self):
         super(CPUList, self)._add_objects()
 
-        from m5.defines import buildEnv
         from importlib import import_module
-        for package in [ "generic", buildEnv['TARGET_ISA']]:
+
+        for isa in {
+            "generic",
+        } | {isa.name.lower() for isa in get_supported_isas()}:
             try:
-                package = import_module(".cores." + package,
-                                        package=__name__.rpartition('.')[0])
+                package = import_module(
+                    ".cores." + isa, package=__name__.rpartition(".")[0]
+                )
             except ImportError:
                 # No timing models for this ISA
                 continue
 
-            for mod_name, module in \
-                inspect.getmembers(package, inspect.ismodule):
-                for name, cls in inspect.getmembers(module,
-                    self._is_obj_class):
+            for mod_name, module in inspect.getmembers(
+                package, inspect.ismodule
+            ):
+                for name, cls in inspect.getmembers(
+                    module, self._is_obj_class
+                ):
                     self._sub_classes[name] = cls
 
+
 class EnumList(ObjectList):
-    """ Creates a list of possible values for a given enum class. """
+    """Creates a list of possible values for a given enum class."""
 
     def _add_objects(self):
-        """ Add all enum values to the ObjectList """
+        """Add all enum values to the ObjectList"""
         self._sub_classes = {}
         for (key, value) in list(self.base_cls.__members__.items()):
             # All Enums have a value Num_NAME at the end which we
@@ -160,31 +174,37 @@
             if not key.startswith("Num_"):
                 self._sub_classes[key] = value
 
-rp_list = ObjectList(getattr(m5.objects, 'BaseReplacementPolicy', None))
-bp_list = ObjectList(getattr(m5.objects, 'BranchPredictor', None))
-cpu_list = CPUList(getattr(m5.objects, 'BaseCPU', None))
-hwp_list = ObjectList(getattr(m5.objects, 'BasePrefetcher', None))
-indirect_bp_list = ObjectList(getattr(m5.objects, 'IndirectPredictor', None))
-mem_list = ObjectList(getattr(m5.objects, 'AbstractMemory', None))
-dram_addr_map_list = EnumList(getattr(m5.internal.params, 'enum_AddrMap',
-                                      None))
+
+rp_list = ObjectList(getattr(m5.objects, "BaseReplacementPolicy", None))
+bp_list = ObjectList(getattr(m5.objects, "BranchPredictor", None))
+cpu_list = CPUList(getattr(m5.objects, "BaseCPU", None))
+hwp_list = ObjectList(getattr(m5.objects, "BasePrefetcher", None))
+indirect_bp_list = ObjectList(getattr(m5.objects, "IndirectPredictor", None))
+mem_list = ObjectList(getattr(m5.objects, "AbstractMemory", None))
+dram_addr_map_list = EnumList(
+    getattr(m5.internal.params, "enum_AddrMap", None)
+)
 
 # Platform aliases. The platforms listed here might not be compiled,
 # we make sure they exist before we add them to the platform list.
-_platform_aliases_all = [
-    ("VExpress_GEM5", "VExpress_GEM5_V1"),
-    ]
-platform_list = ObjectList(getattr(m5.objects, 'Platform', None), \
-    _platform_aliases_all)
+_platform_aliases_all = [("VExpress_GEM5", "VExpress_GEM5_V1")]
+platform_list = ObjectList(
+    getattr(m5.objects, "Platform", None), _platform_aliases_all
+)
+
 
 def _subclass_tester(name):
     sub_class = getattr(m5.objects, name, None)
 
     def tester(cls):
-        return sub_class is not None and cls is not None and \
-            issubclass(cls, sub_class)
+        return (
+            sub_class is not None
+            and cls is not None
+            and issubclass(cls, sub_class)
+        )
 
     return tester
 
+
 is_kvm_cpu = _subclass_tester("BaseKvmCPU")
 is_noncaching_cpu = _subclass_tester("NonCachingSimpleCPU")
diff --git a/configs/common/Options.py b/configs/common/Options.py
index a63cc7b..81d7791 100644
--- a/configs/common/Options.py
+++ b/configs/common/Options.py
@@ -97,6 +97,7 @@
         ObjectList.platform_list.print()
         sys.exit(0)
 
+
 # Add the very basic options that work also in the case of the no ISA
 # being used, and consequently no CPUs, but rather various types of
 # testers and traffic generators.
@@ -104,41 +105,77 @@
 
 def addNoISAOptions(parser):
     parser.add_argument("-n", "--num-cpus", type=int, default=1)
-    parser.add_argument("--sys-voltage", action="store", type=str,
-                        default='1.0V',
-                        help="""Top-level voltage for blocks running at system
-                      power supply""")
-    parser.add_argument("--sys-clock", action="store", type=str,
-                        default='1GHz',
-                        help="""Top-level clock for blocks running at system
-                      speed""")
+    parser.add_argument(
+        "--sys-voltage",
+        action="store",
+        type=str,
+        default="1.0V",
+        help="""Top-level voltage for blocks running at system
+                      power supply""",
+    )
+    parser.add_argument(
+        "--sys-clock",
+        action="store",
+        type=str,
+        default="1GHz",
+        help="""Top-level clock for blocks running at system
+                      speed""",
+    )
 
     # Memory Options
-    parser.add_argument("--list-mem-types",
-                        action=ListMem, nargs=0,
-                        help="List available memory types")
-    parser.add_argument("--mem-type", default="DDR3_1600_8x8",
-                        choices=ObjectList.mem_list.get_names(),
-                        help="type of memory to use")
-    parser.add_argument("--mem-channels", type=int, default=1,
-                        help="number of memory channels")
-    parser.add_argument("--mem-ranks", type=int, default=None,
-                        help="number of memory ranks per channel")
     parser.add_argument(
-        "--mem-size", action="store", type=str, default="512MB",
-        help="Specify the physical memory size (single memory)")
-    parser.add_argument("--enable-dram-powerdown", action="store_true",
-                        help="Enable low-power states in DRAMInterface")
-    parser.add_argument("--mem-channels-intlv", type=int, default=0,
-                        help="Memory channels interleave")
+        "--list-mem-types",
+        action=ListMem,
+        nargs=0,
+        help="List available memory types",
+    )
+    parser.add_argument(
+        "--mem-type",
+        default="DDR3_1600_8x8",
+        choices=ObjectList.mem_list.get_names(),
+        help="type of memory to use",
+    )
+    parser.add_argument(
+        "--mem-channels", type=int, default=1, help="number of memory channels"
+    )
+    parser.add_argument(
+        "--mem-ranks",
+        type=int,
+        default=None,
+        help="number of memory ranks per channel",
+    )
+    parser.add_argument(
+        "--mem-size",
+        action="store",
+        type=str,
+        default="512MB",
+        help="Specify the physical memory size (single memory)",
+    )
+    parser.add_argument(
+        "--enable-dram-powerdown",
+        action="store_true",
+        help="Enable low-power states in DRAMInterface",
+    )
+    parser.add_argument(
+        "--mem-channels-intlv",
+        type=int,
+        default=0,
+        help="Memory channels interleave",
+    )
 
     parser.add_argument("--memchecker", action="store_true")
 
     # Cache Options
-    parser.add_argument("--external-memory-system", type=str,
-                        help="use external ports of this port_type for caches")
-    parser.add_argument("--tlm-memory", type=str,
-                        help="use external port for SystemC TLM cosimulation")
+    parser.add_argument(
+        "--external-memory-system",
+        type=str,
+        help="use external ports of this port_type for caches",
+    )
+    parser.add_argument(
+        "--tlm-memory",
+        type=str,
+        help="use external port for SystemC TLM cosimulation",
+    )
     parser.add_argument("--caches", action="store_true")
     parser.add_argument("--l2cache", action="store_true")
     parser.add_argument("--num-dirs", type=int, default=1)
@@ -158,26 +195,44 @@
     parser.add_argument("--ruby", action="store_true")
 
     # Run duration options
-    parser.add_argument("-m", "--abs-max-tick", type=int, default=m5.MaxTick,
-                        metavar="TICKS", help="Run to absolute simulated tick "
-                        "specified including ticks from a restored checkpoint")
     parser.add_argument(
-        "--rel-max-tick", type=int, default=None, metavar="TICKS",
+        "-m",
+        "--abs-max-tick",
+        type=int,
+        default=m5.MaxTick,
+        metavar="TICKS",
+        help="Run to absolute simulated tick "
+        "specified including ticks from a restored checkpoint",
+    )
+    parser.add_argument(
+        "--rel-max-tick",
+        type=int,
+        default=None,
+        metavar="TICKS",
         help="Simulate for specified number of"
         " ticks relative to the simulation start tick (e.g. if "
-        "restoring a checkpoint)")
-    parser.add_argument("--maxtime", type=float, default=None,
-                        help="Run to the specified absolute simulated time in "
-                        "seconds")
+        "restoring a checkpoint)",
+    )
     parser.add_argument(
-        "-P", "--param", action="append", default=[],
+        "--maxtime",
+        type=float,
+        default=None,
+        help="Run to the specified absolute simulated time in " "seconds",
+    )
+    parser.add_argument(
+        "-P",
+        "--param",
+        action="append",
+        default=[],
         help="Set a SimObject parameter relative to the root node. "
         "An extended Python multi range slicing syntax can be used "
         "for arrays. For example: "
         "'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
         "sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
         "Direct parameters of the root object are not accessible, "
-        "only parameters of its children.")
+        "only parameters of its children.",
+    )
+
 
 # Add common options that assume a non-NULL ISA.
 
@@ -187,273 +242,519 @@
     addNoISAOptions(parser)
 
     # system options
-    parser.add_argument("--list-cpu-types",
-                        action=ListCpu, nargs=0,
-                        help="List available CPU types")
-    parser.add_argument("--cpu-type", default="AtomicSimpleCPU",
-                        choices=ObjectList.cpu_list.get_names(),
-                        help="type of cpu to run with")
-    parser.add_argument("--list-bp-types",
-                        action=ListBp, nargs=0,
-                        help="List available branch predictor types")
-    parser.add_argument("--list-indirect-bp-types",
-                        action=ListIndirectBP, nargs=0,
-                        help="List available indirect branch predictor types")
-    parser.add_argument("--bp-type", default=None,
-                        choices=ObjectList.bp_list.get_names(),
-                        help="""
+    parser.add_argument(
+        "--list-cpu-types",
+        action=ListCpu,
+        nargs=0,
+        help="List available CPU types",
+    )
+    parser.add_argument(
+        "--cpu-type",
+        default="AtomicSimpleCPU",
+        choices=ObjectList.cpu_list.get_names(),
+        help="type of cpu to run with",
+    )
+    parser.add_argument(
+        "--list-bp-types",
+        action=ListBp,
+        nargs=0,
+        help="List available branch predictor types",
+    )
+    parser.add_argument(
+        "--list-indirect-bp-types",
+        action=ListIndirectBP,
+        nargs=0,
+        help="List available indirect branch predictor types",
+    )
+    parser.add_argument(
+        "--bp-type",
+        default=None,
+        choices=ObjectList.bp_list.get_names(),
+        help="""
                         type of branch predictor to run with
                         (if not set, use the default branch predictor of
-                        the selected CPU)""")
-    parser.add_argument("--indirect-bp-type", default=None,
-                        choices=ObjectList.indirect_bp_list.get_names(),
-                        help="type of indirect branch predictor to run with")
+                        the selected CPU)""",
+    )
+    parser.add_argument(
+        "--indirect-bp-type",
+        default=None,
+        choices=ObjectList.indirect_bp_list.get_names(),
+        help="type of indirect branch predictor to run with",
+    )
 
-    parser.add_argument("--list-rp-types",
-                        action=ListRP, nargs=0,
-                        help="List available replacement policy types")
+    parser.add_argument(
+        "--list-rp-types",
+        action=ListRP,
+        nargs=0,
+        help="List available replacement policy types",
+    )
 
-    parser.add_argument("--list-hwp-types",
-                        action=ListHWP, nargs=0,
-                        help="List available hardware prefetcher types")
-    parser.add_argument("--l1i-hwp-type", default=None,
-                        choices=ObjectList.hwp_list.get_names(),
-                        help="""
+    parser.add_argument(
+        "--list-hwp-types",
+        action=ListHWP,
+        nargs=0,
+        help="List available hardware prefetcher types",
+    )
+    parser.add_argument(
+        "--l1i-hwp-type",
+        default=None,
+        choices=ObjectList.hwp_list.get_names(),
+        help="""
                         type of hardware prefetcher to use with the L1
                         instruction cache.
                         (if not set, use the default prefetcher of
-                        the selected cache)""")
-    parser.add_argument("--l1d-hwp-type", default=None,
-                        choices=ObjectList.hwp_list.get_names(),
-                        help="""
+                        the selected cache)""",
+    )
+    parser.add_argument(
+        "--l1d-hwp-type",
+        default=None,
+        choices=ObjectList.hwp_list.get_names(),
+        help="""
                         type of hardware prefetcher to use with the L1
                         data cache.
                         (if not set, use the default prefetcher of
-                        the selected cache)""")
-    parser.add_argument("--l2-hwp-type", default=None,
-                        choices=ObjectList.hwp_list.get_names(),
-                        help="""
+                        the selected cache)""",
+    )
+    parser.add_argument(
+        "--l2-hwp-type",
+        default=None,
+        choices=ObjectList.hwp_list.get_names(),
+        help="""
                         type of hardware prefetcher to use with the L2 cache.
                         (if not set, use the default prefetcher of
-                        the selected cache)""")
+                        the selected cache)""",
+    )
     parser.add_argument("--checker", action="store_true")
-    parser.add_argument("--cpu-clock", action="store", type=str,
-                        default='2GHz',
-                        help="Clock for blocks running at CPU speed")
-    parser.add_argument("--smt", action="store_true", default=False,
-                        help="""
+    parser.add_argument(
+        "--cpu-clock",
+        action="store",
+        type=str,
+        default="2GHz",
+        help="Clock for blocks running at CPU speed",
+    )
+    parser.add_argument(
+        "--smt",
+        action="store_true",
+        default=False,
+        help="""
                       Only used if multiple programs are specified. If true,
                       then the number of threads per cpu is same as the
-                      number of programs.""")
+                      number of programs.""",
+    )
     parser.add_argument(
-        "--elastic-trace-en", action="store_true",
+        "--elastic-trace-en",
+        action="store_true",
         help="""Enable capture of data dependency and instruction
-                      fetch traces using elastic trace probe.""")
+                      fetch traces using elastic trace probe.""",
+    )
     # Trace file paths input to trace probe in a capture simulation and input
     # to Trace CPU in a replay simulation
-    parser.add_argument("--inst-trace-file", action="store", type=str,
-                        help="""Instruction fetch trace file input to
+    parser.add_argument(
+        "--inst-trace-file",
+        action="store",
+        type=str,
+        help="""Instruction fetch trace file input to
                       Elastic Trace probe in a capture simulation and
-                      Trace CPU in a replay simulation""", default="")
-    parser.add_argument("--data-trace-file", action="store", type=str,
-                        help="""Data dependency trace file input to
+                      Trace CPU in a replay simulation""",
+        default="",
+    )
+    parser.add_argument(
+        "--data-trace-file",
+        action="store",
+        type=str,
+        help="""Data dependency trace file input to
                       Elastic Trace probe in a capture simulation and
-                      Trace CPU in a replay simulation""", default="")
+                      Trace CPU in a replay simulation""",
+        default="",
+    )
 
     # dist-gem5 options
-    parser.add_argument("--dist", action="store_true",
-                        help="Parallel distributed gem5 simulation.")
     parser.add_argument(
-        "--dist-sync-on-pseudo-op", action="store_true",
-        help="Use a pseudo-op to start dist-gem5 synchronization.")
+        "--dist",
+        action="store_true",
+        help="Parallel distributed gem5 simulation.",
+    )
     parser.add_argument(
-        "--is-switch", action="store_true",
+        "--dist-sync-on-pseudo-op",
+        action="store_true",
+        help="Use a pseudo-op to start dist-gem5 synchronization.",
+    )
+    parser.add_argument(
+        "--is-switch",
+        action="store_true",
         help="Select the network switch simulator process for a"
-        "distributed gem5 run")
-    parser.add_argument("--dist-rank", default=0, action="store", type=int,
-                        help="Rank of this system within the dist gem5 run.")
+        "distributed gem5 run",
+    )
     parser.add_argument(
-        "--dist-size", default=0, action="store", type=int,
-        help="Number of gem5 processes within the dist gem5 run.")
+        "--dist-rank",
+        default=0,
+        action="store",
+        type=int,
+        help="Rank of this system within the dist gem5 run.",
+    )
     parser.add_argument(
-        "--dist-server-name", default="127.0.0.1", action="store", type=str,
-        help="Name of the message server host\nDEFAULT: localhost")
-    parser.add_argument("--dist-server-port",
-                        default=2200,
-                        action="store", type=int,
-                        help="Message server listen port\nDEFAULT: 2200")
+        "--dist-size",
+        default=0,
+        action="store",
+        type=int,
+        help="Number of gem5 processes within the dist gem5 run.",
+    )
     parser.add_argument(
-        "--dist-sync-repeat", default="0us", action="store", type=str,
+        "--dist-server-name",
+        default="127.0.0.1",
+        action="store",
+        type=str,
+        help="Name of the message server host\nDEFAULT: localhost",
+    )
+    parser.add_argument(
+        "--dist-server-port",
+        default=2200,
+        action="store",
+        type=int,
+        help="Message server listen port\nDEFAULT: 2200",
+    )
+    parser.add_argument(
+        "--dist-sync-repeat",
+        default="0us",
+        action="store",
+        type=str,
         help="Repeat interval for synchronisation barriers among "
-        "dist-gem5 processes\nDEFAULT: --ethernet-linkdelay")
+        "dist-gem5 processes\nDEFAULT: --ethernet-linkdelay",
+    )
     parser.add_argument(
-        "--dist-sync-start", default="5200000000000t", action="store",
+        "--dist-sync-start",
+        default="5200000000000t",
+        action="store",
         type=str,
         help="Time to schedule the first dist synchronisation barrier\n"
-        "DEFAULT:5200000000000t")
-    parser.add_argument("--ethernet-linkspeed", default="10Gbps",
-                        action="store", type=str,
-                        help="Link speed in bps\nDEFAULT: 10Gbps")
-    parser.add_argument("--ethernet-linkdelay", default="10us",
-                        action="store", type=str,
-                        help="Link delay in seconds\nDEFAULT: 10us")
+        "DEFAULT:5200000000000t",
+    )
+    parser.add_argument(
+        "--ethernet-linkspeed",
+        default="10Gbps",
+        action="store",
+        type=str,
+        help="Link speed in bps\nDEFAULT: 10Gbps",
+    )
+    parser.add_argument(
+        "--ethernet-linkdelay",
+        default="10us",
+        action="store",
+        type=str,
+        help="Link delay in seconds\nDEFAULT: 10us",
+    )
 
     # Run duration options
-    parser.add_argument("-I", "--maxinsts", action="store", type=int,
-                        default=None, help="""Total number of instructions to
-                                            simulate (default: run forever)""")
-    parser.add_argument("--work-item-id", action="store", type=int,
-                        help="the specific work id for exit & checkpointing")
-    parser.add_argument("--num-work-ids", action="store", type=int,
-                        help="Number of distinct work item types")
-    parser.add_argument("--work-begin-cpu-id-exit", action="store", type=int,
-                        help="exit when work starts on the specified cpu")
-    parser.add_argument("--work-end-exit-count", action="store", type=int,
-                        help="exit at specified work end count")
-    parser.add_argument("--work-begin-exit-count", action="store", type=int,
-                        help="exit at specified work begin count")
-    parser.add_argument("--init-param", action="store", type=int, default=0,
-                        help="""Parameter available in simulation with m5
-                              initparam""")
     parser.add_argument(
-        "--initialize-only", action="store_true", default=False,
+        "-I",
+        "--maxinsts",
+        action="store",
+        type=int,
+        default=None,
+        help="""Total number of instructions to
+                                            simulate (default: run forever)""",
+    )
+    parser.add_argument(
+        "--work-item-id",
+        action="store",
+        type=int,
+        help="the specific work id for exit & checkpointing",
+    )
+    parser.add_argument(
+        "--num-work-ids",
+        action="store",
+        type=int,
+        help="Number of distinct work item types",
+    )
+    parser.add_argument(
+        "--work-begin-cpu-id-exit",
+        action="store",
+        type=int,
+        help="exit when work starts on the specified cpu",
+    )
+    parser.add_argument(
+        "--work-end-exit-count",
+        action="store",
+        type=int,
+        help="exit at specified work end count",
+    )
+    parser.add_argument(
+        "--work-begin-exit-count",
+        action="store",
+        type=int,
+        help="exit at specified work begin count",
+    )
+    parser.add_argument(
+        "--init-param",
+        action="store",
+        type=int,
+        default=0,
+        help="""Parameter available in simulation with m5
+                              initparam""",
+    )
+    parser.add_argument(
+        "--initialize-only",
+        action="store_true",
+        default=False,
         help="""Exit after initialization. Do not simulate time.
-                              Useful when gem5 is run as a library.""")
+                              Useful when gem5 is run as a library.""",
+    )
 
     # Simpoint options
-    parser.add_argument("--simpoint-profile", action="store_true",
-                        help="Enable basic block profiling for SimPoints")
-    parser.add_argument("--simpoint-interval", type=int, default=10000000,
-                        help="SimPoint interval in num of instructions")
     parser.add_argument(
-        "--take-simpoint-checkpoints", action="store", type=str,
-        help="<simpoint file,weight file,interval-length,warmup-length>")
-    parser.add_argument("--restore-simpoint-checkpoint", action="store_true",
-                        default=False,
-                        help="restore from a simpoint checkpoint taken with " +
-                        "--take-simpoint-checkpoints")
+        "--simpoint-profile",
+        action="store_true",
+        help="Enable basic block profiling for SimPoints",
+    )
+    parser.add_argument(
+        "--simpoint-interval",
+        type=int,
+        default=10000000,
+        help="SimPoint interval in num of instructions",
+    )
+    parser.add_argument(
+        "--take-simpoint-checkpoints",
+        action="store",
+        type=str,
+        help="<simpoint file,weight file,interval-length,warmup-length>",
+    )
+    parser.add_argument(
+        "--restore-simpoint-checkpoint",
+        action="store_true",
+        default=False,
+        help="restore from a simpoint checkpoint taken with "
+        + "--take-simpoint-checkpoints",
+    )
 
     # Checkpointing options
     # Note that performing checkpointing via python script files will override
     # checkpoint instructions built into binaries.
     parser.add_argument(
-        "--take-checkpoints", action="store", type=str,
-        help="<M,N> take checkpoints at tick M and every N ticks thereafter")
+        "--take-checkpoints",
+        action="store",
+        type=str,
+        help="<M,N> take checkpoints at tick M and every N ticks thereafter",
+    )
     parser.add_argument(
-        "--max-checkpoints", action="store", type=int,
-        help="the maximum number of checkpoints to drop", default=5)
+        "--max-checkpoints",
+        action="store",
+        type=int,
+        help="the maximum number of checkpoints to drop",
+        default=5,
+    )
     parser.add_argument(
-        "--checkpoint-dir", action="store", type=str,
-        help="Place all checkpoints in this absolute directory")
-    parser.add_argument("-r", "--checkpoint-restore", action="store", type=int,
-                        help="restore from checkpoint <N>")
-    parser.add_argument("--checkpoint-at-end", action="store_true",
-                        help="take a checkpoint at end of run")
+        "--checkpoint-dir",
+        action="store",
+        type=str,
+        help="Place all checkpoints in this absolute directory",
+    )
     parser.add_argument(
-        "--work-begin-checkpoint-count", action="store", type=int,
-        help="checkpoint at specified work begin count")
+        "-r",
+        "--checkpoint-restore",
+        action="store",
+        type=int,
+        help="restore from checkpoint <N>",
+    )
     parser.add_argument(
-        "--work-end-checkpoint-count", action="store", type=int,
-        help="checkpoint at specified work end count")
+        "--checkpoint-at-end",
+        action="store_true",
+        help="take a checkpoint at end of run",
+    )
     parser.add_argument(
-        "--work-cpus-checkpoint-count", action="store", type=int,
-        help="checkpoint and exit when active cpu count is reached")
-    parser.add_argument("--restore-with-cpu", action="store",
-                        default="AtomicSimpleCPU",
-                        choices=ObjectList.cpu_list.get_names(),
-                        help="cpu type for restoring from a checkpoint")
+        "--work-begin-checkpoint-count",
+        action="store",
+        type=int,
+        help="checkpoint at specified work begin count",
+    )
+    parser.add_argument(
+        "--work-end-checkpoint-count",
+        action="store",
+        type=int,
+        help="checkpoint at specified work end count",
+    )
+    parser.add_argument(
+        "--work-cpus-checkpoint-count",
+        action="store",
+        type=int,
+        help="checkpoint and exit when active cpu count is reached",
+    )
+    parser.add_argument(
+        "--restore-with-cpu",
+        action="store",
+        default="AtomicSimpleCPU",
+        choices=ObjectList.cpu_list.get_names(),
+        help="cpu type for restoring from a checkpoint",
+    )
 
     # CPU Switching - default switch model goes from a checkpoint
     # to a timing simple CPU with caches to warm up, then to detailed CPU for
     # data measurement
     parser.add_argument(
-        "--repeat-switch", action="store", type=int, default=None,
-        help="switch back and forth between CPUs with period <N>")
+        "--repeat-switch",
+        action="store",
+        type=int,
+        default=None,
+        help="switch back and forth between CPUs with period <N>",
+    )
     parser.add_argument(
-        "-s", "--standard-switch", action="store", type=int, default=None,
-        help="switch from timing to Detailed CPU after warmup period of <N>")
-    parser.add_argument("-p", "--prog-interval", type=str,
-                        help="CPU Progress Interval")
+        "-s",
+        "--standard-switch",
+        action="store",
+        type=int,
+        default=None,
+        help="switch from timing to Detailed CPU after warmup period of <N>",
+    )
+    parser.add_argument(
+        "-p", "--prog-interval", type=str, help="CPU Progress Interval"
+    )
 
     # Fastforwarding and simpoint related materials
     parser.add_argument(
-        "-W", "--warmup-insts", action="store", type=int, default=None,
-        help="Warmup period in total instructions (requires --standard-switch)")
+        "-W",
+        "--warmup-insts",
+        action="store",
+        type=int,
+        default=None,
+        help="Warmup period in total instructions (requires --standard-switch)",
+    )
     parser.add_argument(
-        "--bench", action="store", type=str, default=None,
-        help="base names for --take-checkpoint and --checkpoint-restore")
+        "--bench",
+        action="store",
+        type=str,
+        default=None,
+        help="base names for --take-checkpoint and --checkpoint-restore",
+    )
     parser.add_argument(
-        "-F", "--fast-forward", action="store", type=str, default=None,
-        help="Number of instructions to fast forward before switching")
+        "-F",
+        "--fast-forward",
+        action="store",
+        type=str,
+        default=None,
+        help="Number of instructions to fast forward before switching",
+    )
     parser.add_argument(
-        "-S", "--simpoint", action="store_true", default=False,
+        "-S",
+        "--simpoint",
+        action="store_true",
+        default=False,
         help="""Use workload simpoints as an instruction offset for
-                --checkpoint-restore or --take-checkpoint.""")
+                --checkpoint-restore or --take-checkpoint.""",
+    )
     parser.add_argument(
-        "--at-instruction", action="store_true", default=False,
+        "--at-instruction",
+        action="store_true",
+        default=False,
         help="""Treat value of --checkpoint-restore or --take-checkpoint as a
-                number of instructions.""")
-    parser.add_argument("--spec-input", default="ref",
-                        choices=["ref", "test", "train", "smred", "mdred",
-                                 "lgred"],
-                        help="Input set size for SPEC CPU2000 benchmarks.")
-    parser.add_argument("--arm-iset", default="arm",
-                        choices=["arm", "thumb", "aarch64"],
-                        help="ARM instruction set.")
+                number of instructions.""",
+    )
     parser.add_argument(
-        "--stats-root", action="append", default=[],
+        "--spec-input",
+        default="ref",
+        choices=["ref", "test", "train", "smred", "mdred", "lgred"],
+        help="Input set size for SPEC CPU2000 benchmarks.",
+    )
+    parser.add_argument(
+        "--arm-iset",
+        default="arm",
+        choices=["arm", "thumb", "aarch64"],
+        help="ARM instruction set.",
+    )
+    parser.add_argument(
+        "--stats-root",
+        action="append",
+        default=[],
         help="If given, dump only stats of objects under the given SimObject. "
         "SimObjects are identified with Python notation as in: "
         "system.cpu[0].mmu. All elements of an array can be selected at "
         "once with: system.cpu[:].mmu. If given multiple times, dump stats "
         "that are present under any of the roots. If not given, dump all "
-        "stats. ")
+        "stats. ",
+    )
+    parser.add_argument(
+        "--override-vendor-string",
+        action="store",
+        type=str,
+        default=None,
+        help="Override vendor string returned by CPUID instruction in X86.",
+    )
 
 
 def addSEOptions(parser):
     # Benchmark options
-    parser.add_argument("-c", "--cmd", default="",
-                        help="The binary to run in syscall emulation mode.")
-    parser.add_argument("-o", "--options", default="",
-                        help="""The options to pass to the binary, use " "
-                              around the entire string""")
-    parser.add_argument("-e", "--env", default="",
-                        help="Initialize workload environment from text file.")
-    parser.add_argument("-i", "--input", default="",
-                        help="Read stdin from a file.")
-    parser.add_argument("--output", default="",
-                        help="Redirect stdout to a file.")
-    parser.add_argument("--errout", default="",
-                        help="Redirect stderr to a file.")
-    parser.add_argument("--chroot", action="store", type=str, default=None,
-                        help="The chroot option allows a user to alter the "
-                        "search path for processes running in SE mode. "
-                        "Normally, the search path would begin at the "
-                        "root of the filesystem (i.e. /). With chroot, "
-                        "a user can force the process to begin looking at"
-                        "some other location (i.e. /home/user/rand_dir)."
-                        "The intended use is to trick sophisticated "
-                        "software which queries the __HOST__ filesystem "
-                        "for information or functionality. Instead of "
-                        "finding files on the __HOST__ filesystem, the "
-                        "process will find the user's replacment files.")
-    parser.add_argument("--interp-dir", action="store", type=str,
-                        default=None,
-                        help="The interp-dir option is used for "
-                        "setting the interpreter's path. This will "
-                        "allow to load the guest dynamic linker/loader "
-                        "itself from the elf binary. The option points to "
-                        "the parent folder of the guest /lib in the "
-                        "host fs")
+    parser.add_argument(
+        "-c",
+        "--cmd",
+        default="",
+        help="The binary to run in syscall emulation mode.",
+    )
+    parser.add_argument(
+        "-o",
+        "--options",
+        default="",
+        help="""The options to pass to the binary, use " "
+                              around the entire string""",
+    )
+    parser.add_argument(
+        "-e",
+        "--env",
+        default="",
+        help="Initialize workload environment from text file.",
+    )
+    parser.add_argument(
+        "-i", "--input", default="", help="Read stdin from a file."
+    )
+    parser.add_argument(
+        "--output", default="", help="Redirect stdout to a file."
+    )
+    parser.add_argument(
+        "--errout", default="", help="Redirect stderr to a file."
+    )
+    parser.add_argument(
+        "--chroot",
+        action="store",
+        type=str,
+        default=None,
+        help="The chroot option allows a user to alter the "
+        "search path for processes running in SE mode. "
+        "Normally, the search path would begin at the "
+        "root of the filesystem (i.e. /). With chroot, "
+        "a user can force the process to begin looking at"
+        "some other location (i.e. /home/user/rand_dir)."
+        "The intended use is to trick sophisticated "
+        "software which queries the __HOST__ filesystem "
+        "for information or functionality. Instead of "
+        "finding files on the __HOST__ filesystem, the "
+        "process will find the user's replacment files.",
+    )
+    parser.add_argument(
+        "--interp-dir",
+        action="store",
+        type=str,
+        default=None,
+        help="The interp-dir option is used for "
+        "setting the interpreter's path. This will "
+        "allow to load the guest dynamic linker/loader "
+        "itself from the elf binary. The option points to "
+        "the parent folder of the guest /lib in the "
+        "host fs",
+    )
 
-    parser.add_argument("--redirects", action="append", type=str,
-                        default=[],
-                        help="A collection of one or more redirect paths "
-                        "to be used in syscall emulation."
-                        "Usage: gem5.opt [...] --redirects /dir1=/path/"
-                        "to/host/dir1 --redirects /dir2=/path/to/host/dir2")
-    parser.add_argument("--wait-gdb", default=False, action='store_true',
-                        help="Wait for remote GDB to connect.")
+    parser.add_argument(
+        "--redirects",
+        action="append",
+        type=str,
+        default=[],
+        help="A collection of one or more redirect paths "
+        "to be used in syscall emulation."
+        "Usage: gem5.opt [...] --redirects /dir1=/path/"
+        "to/host/dir1 --redirects /dir2=/path/to/host/dir2",
+    )
+    parser.add_argument(
+        "--wait-gdb",
+        default=False,
+        action="store_true",
+        help="Wait for remote GDB to connect.",
+    )
 
 
 def addFSOptions(parser):
@@ -461,73 +762,128 @@
 
     # Simulation options
     parser.add_argument(
-        "--timesync", action="store_true",
-        help="Prevent simulated time from getting ahead of real time")
+        "--timesync",
+        action="store_true",
+        help="Prevent simulated time from getting ahead of real time",
+    )
 
     # System options
     parser.add_argument("--kernel", action="store", type=str)
-    parser.add_argument("--os-type", action="store",
-                        choices=os_types[str(buildEnv['TARGET_ISA'])],
-                        default="linux",
-                        help="Specifies type of OS to boot")
+    parser.add_argument(
+        "--os-type",
+        action="store",
+        choices=os_types,
+        default="linux",
+        help="Specifies type of OS to boot",
+    )
     parser.add_argument("--script", action="store", type=str)
     parser.add_argument(
-        "--frame-capture", action="store_true",
+        "--frame-capture",
+        action="store_true",
         help="Stores changed frame buffers from the VNC server to compressed "
-        "files in the gem5 output directory")
+        "files in the gem5 output directory",
+    )
 
-    if buildEnv['TARGET_ISA'] == "arm":
+    if buildEnv["USE_ARM_ISA"]:
         parser.add_argument(
-            "--bare-metal", action="store_true",
-            help="Provide the raw system without the linux specific bits")
-        parser.add_argument("--list-machine-types",
-                            action=ListPlatform, nargs=0,
-                            help="List available platform types")
-        parser.add_argument("--machine-type", action="store",
-                            choices=ObjectList.platform_list.get_names(),
-                            default="VExpress_GEM5_V1")
+            "--bare-metal",
+            action="store_true",
+            help="Provide the raw system without the linux specific bits",
+        )
         parser.add_argument(
-            "--dtb-filename", action="store", type=str,
+            "--list-machine-types",
+            action=ListPlatform,
+            nargs=0,
+            help="List available platform types",
+        )
+        parser.add_argument(
+            "--machine-type",
+            action="store",
+            choices=ObjectList.platform_list.get_names(),
+            default="VExpress_GEM5_V1",
+        )
+        parser.add_argument(
+            "--dtb-filename",
+            action="store",
+            type=str,
             help="Specifies device tree blob file to use with device-tree-"
-            "enabled kernels")
+            "enabled kernels",
+        )
         parser.add_argument(
-            "--enable-context-switch-stats-dump", action="store_true",
+            "--enable-context-switch-stats-dump",
+            action="store_true",
             help="Enable stats dump at context "
-            "switches and dump tasks file (required for Streamline)")
+            "switches and dump tasks file (required for Streamline)",
+        )
         parser.add_argument("--vio-9p", action="store_true", help=vio_9p_help)
         parser.add_argument(
-            "--bootloader", action='append',
-            help="executable file that runs before the --kernel")
+            "--bootloader",
+            action="append",
+            help="executable file that runs before the --kernel",
+        )
 
     # Benchmark options
     parser.add_argument(
-        "--dual", action="store_true",
-        help="Simulate two systems attached with an ethernet link")
+        "--dual",
+        action="store_true",
+        help="Simulate two systems attached with an ethernet link",
+    )
     parser.add_argument(
-        "-b", "--benchmark", action="store", type=str, dest="benchmark",
-        help="Specify the benchmark to run. Available benchmarks: %s" %
-        DefinedBenchmarks)
+        "-b",
+        "--benchmark",
+        action="store",
+        type=str,
+        dest="benchmark",
+        help="Specify the benchmark to run. Available benchmarks: %s"
+        % DefinedBenchmarks,
+    )
 
     # Metafile options
     parser.add_argument(
-        "--etherdump", action="store", type=str, dest="etherdump",
+        "--etherdump",
+        action="store",
+        type=str,
+        dest="etherdump",
         help="Specify the filename to dump a pcap capture of the"
-        "ethernet traffic")
+        "ethernet traffic",
+    )
 
     # Disk Image Options
-    parser.add_argument("--disk-image", action="append", type=str,
-                        default=[], help="Path to the disk images to use.")
-    parser.add_argument("--root-device", action="store", type=str,
-                        default=None, help="OS device name for root partition")
+    parser.add_argument(
+        "--disk-image",
+        action="append",
+        type=str,
+        default=[],
+        help="Path to the disk images to use.",
+    )
+    parser.add_argument(
+        "--root-device",
+        action="store",
+        type=str,
+        default=None,
+        help="OS device name for root partition",
+    )
 
     # Command line options
-    parser.add_argument("--command-line", action="store", type=str,
-                        default=None,
-                        help="Template for the kernel command line.")
     parser.add_argument(
-        "--command-line-file", action="store", default=None, type=str,
-        help="File with a template for the kernel command line")
+        "--command-line",
+        action="store",
+        type=str,
+        default=None,
+        help="Template for the kernel command line.",
+    )
+    parser.add_argument(
+        "--command-line-file",
+        action="store",
+        default=None,
+        type=str,
+        help="File with a template for the kernel command line",
+    )
 
     # Debug option
-    parser.add_argument("--wait-gdb", default=False, action='store_true',
-                        help="Wait for remote GDB to connect.")
+    parser.add_argument(
+        "--wait-gdb",
+        default=False,
+        action="store_true",
+        help="Wait for remote GDB to connect.",
+    )
diff --git a/configs/common/SimpleOpts.py b/configs/common/SimpleOpts.py
index fabc8e0..96c73f5 100644
--- a/configs/common/SimpleOpts.py
+++ b/configs/common/SimpleOpts.py
@@ -44,21 +44,22 @@
 # add the args we want to be able to control from the command line
 parser = ArgumentParser()
 
+
 def add_option(*args, **kwargs):
-    """Call "add_option" to the global options parser
-    """
+    """Call "add_option" to the global options parser"""
 
     if called_parse_args:
         m5.fatal("Can't add an option after calling SimpleOpts.parse_args")
 
     parser.add_argument(*args, **kwargs)
 
+
 def parse_args():
     global called_parse_args
     called_parse_args = True
 
     return parser.parse_args()
 
+
 def print_help(*args, **kwargs):
     parser.print_help(*args, **kwargs)
-
diff --git a/configs/common/Simulation.py b/configs/common/Simulation.py
index 2416773..731b3fc 100644
--- a/configs/common/Simulation.py
+++ b/configs/common/Simulation.py
@@ -49,27 +49,28 @@
 from m5.objects import *
 from m5.util import *
 
-addToPath('../common')
+addToPath("../common")
+
 
 def getCPUClass(cpu_type):
     """Returns the required cpu class and the mode of operation."""
     cls = ObjectList.cpu_list.get(cpu_type)
     return cls, cls.memory_mode()
 
+
 def setCPUClass(options):
     """Returns two cpu classes and the initial mode of operation.
 
-       Restoring from a checkpoint or fast forwarding through a benchmark
-       can be done using one type of cpu, and then the actual
-       simulation can be carried out using another type. This function
-       returns these two types of cpus and the initial mode of operation
-       depending on the options provided.
+    Restoring from a checkpoint or fast forwarding through a benchmark
+    can be done using one type of cpu, and then the actual
+    simulation can be carried out using another type. This function
+    returns these two types of cpus and the initial mode of operation
+    depending on the options provided.
     """
 
     TmpClass, test_mem_mode = getCPUClass(options.cpu_type)
     CPUClass = None
-    if TmpClass.require_caches() and \
-            not options.caches and not options.ruby:
+    if TmpClass.require_caches() and not options.caches and not options.ruby:
         fatal("%s must be used with caches" % options.cpu_type)
 
     if options.checkpoint_restore != None:
@@ -79,20 +80,22 @@
     elif options.fast_forward:
         CPUClass = TmpClass
         TmpClass = AtomicSimpleCPU
-        test_mem_mode = 'atomic'
+        test_mem_mode = "atomic"
 
     # Ruby only supports atomic accesses in noncaching mode
-    if test_mem_mode == 'atomic' and options.ruby:
+    if test_mem_mode == "atomic" and options.ruby:
         warn("Memory mode will be changed to atomic_noncaching")
-        test_mem_mode = 'atomic_noncaching'
+        test_mem_mode = "atomic_noncaching"
 
     return (TmpClass, test_mem_mode, CPUClass)
 
+
 def setMemClass(options):
     """Returns a memory controller class."""
 
     return ObjectList.mem_list.get(options.mem_type)
 
+
 def setWorkCountOptions(system, options):
     if options.work_item_id != None:
         system.work_item_id = options.work_item_id
@@ -111,6 +114,7 @@
     if options.work_cpus_checkpoint_count != None:
         system.work_cpus_ckpt_count = options.work_cpus_checkpoint_count
 
+
 def findCptDir(options, cptdir, testsys):
     """Figures out the directory from which the checkpointed state is read.
 
@@ -137,7 +141,7 @@
         if options.simpoint:
             # assume workload 0 has the simpoint
             if testsys.cpu[0].workload[0].simpoint == 0:
-                fatal('Unable to find simpoint')
+                fatal("Unable to find simpoint")
             inst += int(testsys.cpu[0].workload[0].simpoint)
 
         checkpoint_dir = joinpath(cptdir, "cpt.%s.%s" % (options.bench, inst))
@@ -148,8 +152,10 @@
         # Restore from SimPoint checkpoints
         # Assumes that the checkpoint dir names are formatted as follows:
         dirs = listdir(cptdir)
-        expr = re.compile('cpt\.simpoint_(\d+)_inst_(\d+)' +
-                    '_weight_([\d\.e\-]+)_interval_(\d+)_warmup_(\d+)')
+        expr = re.compile(
+            "cpt\.simpoint_(\d+)_inst_(\d+)"
+            + "_weight_([\d\.e\-]+)_interval_(\d+)_warmup_(\d+)"
+        )
         cpts = []
         for dir in dirs:
             match = expr.match(dir)
@@ -159,7 +165,7 @@
 
         cpt_num = options.checkpoint_restore
         if cpt_num > len(cpts):
-            fatal('Checkpoint %d not found', cpt_num)
+            fatal("Checkpoint %d not found", cpt_num)
         checkpoint_dir = joinpath(cptdir, cpts[cpt_num - 1])
         match = expr.match(cpts[cpt_num - 1])
         if match:
@@ -176,30 +182,33 @@
         if testsys.switch_cpus != None:
             testsys.switch_cpus[0].simpoint_start_insts = simpoint_start_insts
 
-        print("Resuming from SimPoint", end=' ')
-        print("#%d, start_inst:%d, weight:%f, interval:%d, warmup:%d" %
-            (index, start_inst, weight_inst, interval_length, warmup_length))
+        print("Resuming from SimPoint", end=" ")
+        print(
+            "#%d, start_inst:%d, weight:%f, interval:%d, warmup:%d"
+            % (index, start_inst, weight_inst, interval_length, warmup_length)
+        )
 
     else:
         dirs = listdir(cptdir)
-        expr = re.compile('cpt\.([0-9]+)')
+        expr = re.compile("cpt\.([0-9]+)")
         cpts = []
         for dir in dirs:
             match = expr.match(dir)
             if match:
                 cpts.append(match.group(1))
 
-        cpts.sort(key = lambda a: int(a))
+        cpts.sort(key=lambda a: int(a))
 
         cpt_num = options.checkpoint_restore
         if cpt_num > len(cpts):
-            fatal('Checkpoint %d not found', cpt_num)
+            fatal("Checkpoint %d not found", cpt_num)
 
         cpt_starttick = int(cpts[cpt_num - 1])
         checkpoint_dir = joinpath(cptdir, "cpt.%s" % cpts[cpt_num - 1])
 
     return cpt_starttick, checkpoint_dir
 
+
 def scriptCheckpoints(options, maxtick, cptdir):
     if options.at_instruction or options.simpoint:
         checkpoint_inst = int(options.take_checkpoints)
@@ -219,8 +228,11 @@
             exit_cause = exit_event.getCause()
 
         if exit_cause == "a thread reached the max instruction count":
-            m5.checkpoint(joinpath(cptdir, "cpt.%s.%d" % \
-                    (options.bench, checkpoint_inst)))
+            m5.checkpoint(
+                joinpath(
+                    cptdir, "cpt.%s.%d" % (options.bench, checkpoint_inst)
+                )
+            )
             print("Checkpoint written.")
 
     else:
@@ -242,8 +254,10 @@
         sim_ticks = when
         max_checkpoints = options.max_checkpoints
 
-        while num_checkpoints < max_checkpoints and \
-                exit_cause == "simulate() limit reached":
+        while (
+            num_checkpoints < max_checkpoints
+            and exit_cause == "simulate() limit reached"
+        ):
             if (sim_ticks + period) > maxtick:
                 exit_event = m5.simulate(maxtick - sim_ticks)
                 exit_cause = exit_event.getCause()
@@ -260,6 +274,7 @@
 
     return exit_event
 
+
 def benchCheckpoints(options, maxtick, cptdir):
     exit_event = m5.simulate(maxtick - m5.curTick())
     exit_cause = exit_event.getCause()
@@ -279,13 +294,18 @@
 
     return exit_event
 
+
 # Set up environment for taking SimPoint checkpoints
 # Expecting SimPoint files generated by SimPoint 3.2
 def parseSimpointAnalysisFile(options, testsys):
     import re
 
-    simpoint_filename, weight_filename, interval_length, warmup_length = \
-        options.take_simpoint_checkpoints.split(",", 3)
+    (
+        simpoint_filename,
+        weight_filename,
+        interval_length,
+        warmup_length,
+    ) = options.take_simpoint_checkpoints.split(",", 3)
     print("simpoint analysis file:", simpoint_filename)
     print("simpoint weight file:", weight_filename)
     print("interval length:", interval_length)
@@ -309,20 +329,19 @@
         if m:
             interval = int(m.group(1))
         else:
-            fatal('unrecognized line in simpoint file!')
+            fatal("unrecognized line in simpoint file!")
 
         line = weight_file.readline()
         if not line:
-            fatal('not enough lines in simpoint weight file!')
+            fatal("not enough lines in simpoint weight file!")
         m = re.match("([0-9\.e\-]+)\s+(\d+)", line)
         if m:
             weight = float(m.group(1))
         else:
-            fatal('unrecognized line in simpoint weight file!')
+            fatal("unrecognized line in simpoint weight file!")
 
-        if (interval * interval_length - warmup_length > 0):
-            starting_inst_count = \
-                interval * interval_length - warmup_length
+        if interval * interval_length - warmup_length > 0:
+            starting_inst_count = interval * interval_length - warmup_length
             actual_warmup_length = warmup_length
         else:
             # Not enough room for proper warmup
@@ -330,15 +349,20 @@
             starting_inst_count = 0
             actual_warmup_length = interval * interval_length
 
-        simpoints.append((interval, weight, starting_inst_count,
-            actual_warmup_length))
+        simpoints.append(
+            (interval, weight, starting_inst_count, actual_warmup_length)
+        )
 
     # Sort SimPoints by starting inst count
     simpoints.sort(key=lambda obj: obj[2])
     for s in simpoints:
         interval, weight, starting_inst_count, actual_warmup_length = s
-        print(str(interval), str(weight), starting_inst_count,
-            actual_warmup_length)
+        print(
+            str(interval),
+            str(weight),
+            starting_inst_count,
+            actual_warmup_length,
+        )
         simpoint_start_insts.append(starting_inst_count)
 
     print("Total # of simpoints:", len(simpoints))
@@ -346,6 +370,7 @@
 
     return (simpoints, interval_length)
 
+
 def takeSimpointCheckpoints(simpoints, interval_length, cptdir):
     num_checkpoints = 0
     index = 0
@@ -369,22 +394,34 @@
             code = exit_event.getCode()
 
         if exit_cause == "simpoint starting point found":
-            m5.checkpoint(joinpath(cptdir,
-                "cpt.simpoint_%02d_inst_%d_weight_%f_interval_%d_warmup_%d"
-                % (index, starting_inst_count, weight, interval_length,
-                actual_warmup_length)))
-            print("Checkpoint #%d written. start inst:%d weight:%f" %
-                (num_checkpoints, starting_inst_count, weight))
+            m5.checkpoint(
+                joinpath(
+                    cptdir,
+                    "cpt.simpoint_%02d_inst_%d_weight_%f_interval_%d_warmup_%d"
+                    % (
+                        index,
+                        starting_inst_count,
+                        weight,
+                        interval_length,
+                        actual_warmup_length,
+                    ),
+                )
+            )
+            print(
+                "Checkpoint #%d written. start inst:%d weight:%f"
+                % (num_checkpoints, starting_inst_count, weight)
+            )
             num_checkpoints += 1
             last_chkpnt_inst_count = starting_inst_count
         else:
             break
         index += 1
 
-    print('Exiting @ tick %i because %s' % (m5.curTick(), exit_cause))
+    print("Exiting @ tick %i because %s" % (m5.curTick(), exit_cause))
     print("%d checkpoints taken" % num_checkpoints)
     sys.exit(code)
 
+
 def restoreSimpointCheckpoint():
     exit_event = m5.simulate()
     exit_cause = exit_event.getCause()
@@ -401,9 +438,10 @@
             print("Done running SimPoint!")
             sys.exit(exit_event.getCode())
 
-    print('Exiting @ tick %i because %s' % (m5.curTick(), exit_cause))
+    print("Exiting @ tick %i because %s" % (m5.curTick(), exit_cause))
     sys.exit(exit_event.getCode())
 
+
 def repeatSwitch(testsys, repeat_switch_cpu_list, maxtick, switch_freq):
     print("starting switch loop")
     while True:
@@ -424,6 +462,7 @@
             exit_event = m5.simulate(maxtick - m5.curTick())
             return exit_event
 
+
 def run(options, root, testsys, cpu_class):
     if options.checkpoint_dir:
         cptdir = options.checkpoint_dir
@@ -461,9 +500,17 @@
         for i in range(np):
             testsys.cpu[i].max_insts_any_thread = options.maxinsts
 
+    if options.override_vendor_string is not None:
+        for i in range(len(testsys.cpu)):
+            for j in range(len(testsys.cpu[i].isa)):
+                testsys.cpu[i].isa[
+                    j
+                ].vendor_string = options.override_vendor_string
+
     if cpu_class:
-        switch_cpus = [cpu_class(switched_out=True, cpu_id=(i))
-                       for i in range(np)]
+        switch_cpus = [
+            cpu_class(switched_out=True, cpu_id=(i)) for i in range(np)
+        ]
 
         for i in range(np):
             if options.fast_forward:
@@ -471,8 +518,7 @@
             switch_cpus[i].system = testsys
             switch_cpus[i].workload = testsys.cpu[i].workload
             switch_cpus[i].clk_domain = testsys.cpu[i].clk_domain
-            switch_cpus[i].progress_interval = \
-                testsys.cpu[i].progress_interval
+            switch_cpus[i].progress_interval = testsys.cpu[i].progress_interval
             switch_cpus[i].isa = testsys.cpu[i].isa
             # simulation period
             if options.maxinsts:
@@ -485,9 +531,11 @@
                 switch_cpus[i].branchPred = bpClass()
             if options.indirect_bp_type:
                 IndirectBPClass = ObjectList.indirect_bp_list.get(
-                    options.indirect_bp_type)
-                switch_cpus[i].branchPred.indirectBranchPred = \
-                    IndirectBPClass()
+                    options.indirect_bp_type
+                )
+                switch_cpus[
+                    i
+                ].branchPred.indirectBranchPred = IndirectBPClass()
             switch_cpus[i].createThreads()
 
         # If elastic tracing is enabled attach the elastic trace probe
@@ -500,16 +548,16 @@
 
     if options.repeat_switch:
         switch_class = getCPUClass(options.cpu_type)[0]
-        if switch_class.require_caches() and \
-                not options.caches:
+        if switch_class.require_caches() and not options.caches:
             print("%s: Must be used with caches" % str(switch_class))
             sys.exit(1)
         if not switch_class.support_take_over():
             print("%s: CPU switching not supported" % str(switch_class))
             sys.exit(1)
 
-        repeat_switch_cpus = [switch_class(switched_out=True, \
-                                               cpu_id=(i)) for i in range(np)]
+        repeat_switch_cpus = [
+            switch_class(switched_out=True, cpu_id=(i)) for i in range(np)
+        ]
 
         for i in range(np):
             repeat_switch_cpus[i].system = testsys
@@ -523,24 +571,30 @@
             if options.checker:
                 repeat_switch_cpus[i].addCheckerCpu()
 
+            repeat_switch_cpus[i].createThreads()
+
         testsys.repeat_switch_cpus = repeat_switch_cpus
 
         if cpu_class:
-            repeat_switch_cpu_list = [(switch_cpus[i], repeat_switch_cpus[i])
-                                      for i in range(np)]
+            repeat_switch_cpu_list = [
+                (switch_cpus[i], repeat_switch_cpus[i]) for i in range(np)
+            ]
         else:
-            repeat_switch_cpu_list = [(testsys.cpu[i], repeat_switch_cpus[i])
-                                      for i in range(np)]
+            repeat_switch_cpu_list = [
+                (testsys.cpu[i], repeat_switch_cpus[i]) for i in range(np)
+            ]
 
     if options.standard_switch:
-        switch_cpus = [TimingSimpleCPU(switched_out=True, cpu_id=(i))
-                       for i in range(np)]
-        switch_cpus_1 = [DerivO3CPU(switched_out=True, cpu_id=(i))
-                        for i in range(np)]
+        switch_cpus = [
+            TimingSimpleCPU(switched_out=True, cpu_id=(i)) for i in range(np)
+        ]
+        switch_cpus_1 = [
+            DerivO3CPU(switched_out=True, cpu_id=(i)) for i in range(np)
+        ]
 
         for i in range(np):
-            switch_cpus[i].system =  testsys
-            switch_cpus_1[i].system =  testsys
+            switch_cpus[i].system = testsys
+            switch_cpus_1[i].system = testsys
             switch_cpus[i].workload = testsys.cpu[i].workload
             switch_cpus_1[i].workload = testsys.cpu[i].workload
             switch_cpus[i].clk_domain = testsys.cpu[i].clk_domain
@@ -557,16 +611,17 @@
             # Fast forward to a simpoint (warning: time consuming)
             elif options.simpoint:
                 if testsys.cpu[i].workload[0].simpoint == 0:
-                    fatal('simpoint not found')
-                testsys.cpu[i].max_insts_any_thread = \
+                    fatal("simpoint not found")
+                testsys.cpu[i].max_insts_any_thread = (
                     testsys.cpu[i].workload[0].simpoint
+                )
             # No distance specified, just switch
             else:
                 testsys.cpu[i].max_insts_any_thread = 1
 
             # warmup period
             if options.warmup_insts:
-                switch_cpus[i].max_insts_any_thread =  options.warmup_insts
+                switch_cpus[i].max_insts_any_thread = options.warmup_insts
 
             # simulation period
             if options.maxinsts:
@@ -577,25 +632,29 @@
                 switch_cpus[i].addCheckerCpu()
                 switch_cpus_1[i].addCheckerCpu()
 
+            switch_cpus[i].createThreads()
+            switch_cpus_1[i].createThreads()
+
         testsys.switch_cpus = switch_cpus
         testsys.switch_cpus_1 = switch_cpus_1
-        switch_cpu_list = [
-            (testsys.cpu[i], switch_cpus[i]) for i in range(np)
-        ]
+        switch_cpu_list = [(testsys.cpu[i], switch_cpus[i]) for i in range(np)]
         switch_cpu_list1 = [
             (switch_cpus[i], switch_cpus_1[i]) for i in range(np)
         ]
 
     # set the checkpoint in the cpu before m5.instantiate is called
-    if options.take_checkpoints != None and \
-           (options.simpoint or options.at_instruction):
+    if options.take_checkpoints != None and (
+        options.simpoint or options.at_instruction
+    ):
         offset = int(options.take_checkpoints)
         # Set an instruction break point
         if options.simpoint:
             for i in range(np):
                 if testsys.cpu[i].workload[0].simpoint == 0:
-                    fatal('no simpoint for testsys.cpu[%d].workload[0]', i)
-                checkpoint_inst = int(testsys.cpu[i].workload[0].simpoint) + offset
+                    fatal("no simpoint for testsys.cpu[%d].workload[0]", i)
+                checkpoint_inst = (
+                    int(testsys.cpu[i].workload[0].simpoint) + offset
+                )
                 testsys.cpu[i].max_insts_any_thread = checkpoint_inst
                 # used for output below
                 options.take_checkpoints = checkpoint_inst
@@ -607,7 +666,9 @@
                 testsys.cpu[i].max_insts_any_thread = offset
 
     if options.take_simpoint_checkpoints != None:
-        simpoints, interval_length = parseSimpointAnalysisFile(options, testsys)
+        simpoints, interval_length = parseSimpointAnalysisFile(
+            options, testsys
+        )
 
     checkpoint_dir = None
     if options.checkpoint_restore:
@@ -640,31 +701,43 @@
             # the ticks per simulated second
             maxtick_from_rel += cpt_starttick
             if options.at_instruction or options.simpoint:
-                warn("Relative max tick specified with --at-instruction or" \
-                     " --simpoint\n      These options don't specify the " \
-                     "checkpoint start tick, so assuming\n      you mean " \
-                     "absolute max tick")
+                warn(
+                    "Relative max tick specified with --at-instruction or"
+                    " --simpoint\n      These options don't specify the "
+                    "checkpoint start tick, so assuming\n      you mean "
+                    "absolute max tick"
+                )
         explicit_maxticks += 1
     if options.maxtime:
         maxtick_from_maxtime = m5.ticks.fromSeconds(options.maxtime)
         explicit_maxticks += 1
     if explicit_maxticks > 1:
-        warn("Specified multiple of --abs-max-tick, --rel-max-tick, --maxtime."\
-             " Using least")
+        warn(
+            "Specified multiple of --abs-max-tick, --rel-max-tick, --maxtime."
+            " Using least"
+        )
     maxtick = min([maxtick_from_abs, maxtick_from_rel, maxtick_from_maxtime])
 
     if options.checkpoint_restore != None and maxtick < cpt_starttick:
-        fatal("Bad maxtick (%d) specified: " \
-              "Checkpoint starts starts from tick: %d", maxtick, cpt_starttick)
+        fatal(
+            "Bad maxtick (%d) specified: "
+            "Checkpoint starts starts from tick: %d",
+            maxtick,
+            cpt_starttick,
+        )
 
     if options.standard_switch or cpu_class:
         if options.standard_switch:
-            print("Switch at instruction count:%s" %
-                    str(testsys.cpu[0].max_insts_any_thread))
+            print(
+                "Switch at instruction count:%s"
+                % str(testsys.cpu[0].max_insts_any_thread)
+            )
             exit_event = m5.simulate()
         elif cpu_class and options.fast_forward:
-            print("Switch at instruction count:%s" %
-                    str(testsys.cpu[0].max_insts_any_thread))
+            print(
+                "Switch at instruction count:%s"
+                % str(testsys.cpu[0].max_insts_any_thread)
+            )
             exit_event = m5.simulate()
         else:
             print("Switch at curTick count:%s" % str(10000))
@@ -674,32 +747,37 @@
         m5.switchCpus(testsys, switch_cpu_list)
 
         if options.standard_switch:
-            print("Switch at instruction count:%d" %
-                    (testsys.switch_cpus[0].max_insts_any_thread))
+            print(
+                "Switch at instruction count:%d"
+                % (testsys.switch_cpus[0].max_insts_any_thread)
+            )
 
-            #warmup instruction count may have already been set
+            # warmup instruction count may have already been set
             if options.warmup_insts:
                 exit_event = m5.simulate()
             else:
                 exit_event = m5.simulate(options.standard_switch)
             print("Switching CPUS @ tick %s" % (m5.curTick()))
-            print("Simulation ends instruction count:%d" %
-                    (testsys.switch_cpus_1[0].max_insts_any_thread))
+            print(
+                "Simulation ends instruction count:%d"
+                % (testsys.switch_cpus_1[0].max_insts_any_thread)
+            )
             m5.switchCpus(testsys, switch_cpu_list1)
 
     # If we're taking and restoring checkpoints, use checkpoint_dir
     # option only for finding the checkpoints to restore from.  This
     # lets us test checkpointing by restoring from one set of
     # checkpoints, generating a second set, and then comparing them.
-    if (options.take_checkpoints or options.take_simpoint_checkpoints) \
-        and options.checkpoint_restore:
+    if (
+        options.take_checkpoints or options.take_simpoint_checkpoints
+    ) and options.checkpoint_restore:
 
         if m5.options.outdir:
             cptdir = m5.options.outdir
         else:
             cptdir = getcwd()
 
-    if options.take_checkpoints != None :
+    if options.take_checkpoints != None:
         # Checkpoints being taken via the command line at <when> and at
         # subsequent periods of <period>.  Checkpoint instructions
         # received from the benchmark running are ignored and skipped in
@@ -722,13 +800,15 @@
         # If checkpoints are being taken, then the checkpoint instruction
         # will occur in the benchmark code it self.
         if options.repeat_switch and maxtick > options.repeat_switch:
-            exit_event = repeatSwitch(testsys, repeat_switch_cpu_list,
-                                      maxtick, options.repeat_switch)
+            exit_event = repeatSwitch(
+                testsys, repeat_switch_cpu_list, maxtick, options.repeat_switch
+            )
         else:
             exit_event = benchCheckpoints(options, maxtick, cptdir)
 
-    print('Exiting @ tick %i because %s' %
-          (m5.curTick(), exit_event.getCause()))
+    print(
+        "Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause())
+    )
     if options.checkpoint_at_end:
         m5.checkpoint(joinpath(cptdir, "cpt.%d"))
 
diff --git a/configs/common/SysPaths.py b/configs/common/SysPaths.py
index 762efaf..7c0f5bf 100644
--- a/configs/common/SysPaths.py
+++ b/configs/common/SysPaths.py
@@ -29,9 +29,10 @@
 config_path = os.path.dirname(os.path.abspath(__file__))
 config_root = os.path.dirname(config_path)
 
+
 class PathSearchFunc(object):
     _sys_paths = None
-    environment_variable = 'M5_PATH'
+    environment_variable = "M5_PATH"
 
     def __init__(self, subdirs, sys_paths=None):
         if isinstance(subdirs, str):
@@ -46,9 +47,9 @@
         else:
             if self._sys_paths is None:
                 try:
-                    paths = os.environ[self.environment_variable].split(':')
+                    paths = os.environ[self.environment_variable].split(":")
                 except KeyError:
-                    paths = [ '/dist/m5/system', '/n/poolfs/z/dist/m5/system' ]
+                    paths = ["/dist/m5/system", "/n/poolfs/z/dist/m5/system"]
 
                 # expand '~' and '~user' in paths
                 paths = list(map(os.path.expanduser, paths))
@@ -59,8 +60,10 @@
                 if not paths:
                     raise IOError(
                         "Can't find system files directory, "
-                        "check your {} environment variable"
-                        .format(self.environment_variable))
+                        "check your {} environment variable".format(
+                            self.environment_variable
+                        )
+                    )
 
                 self._sys_paths = list(paths)
 
@@ -69,9 +72,13 @@
             try:
                 return next(p for p in paths if os.path.exists(p))
             except StopIteration:
-                raise IOError("Can't find file '{}' on {}."
-                        .format(filepath, self.environment_variable))
+                raise IOError(
+                    "Can't find file '{}' on {}.".format(
+                        filepath, self.environment_variable
+                    )
+                )
 
-disk = PathSearchFunc('disks')
-binary = PathSearchFunc('binaries')
-script = PathSearchFunc('boot', sys_paths=[config_root])
+
+disk = PathSearchFunc("disks")
+binary = PathSearchFunc("binaries")
+script = PathSearchFunc("boot", sys_paths=[config_root])
diff --git a/configs/common/__init__.py b/configs/common/__init__.py
index 9b43643..4fe0002 100644
--- a/configs/common/__init__.py
+++ b/configs/common/__init__.py
@@ -32,4 +32,3 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
diff --git a/configs/common/cores/arm/HPI.py b/configs/common/cores/arm/HPI.py
index 3a11133..c7a8127 100644
--- a/configs/common/cores/arm/HPI.py
+++ b/configs/common/cores/arm/HPI.py
@@ -58,41 +58,47 @@
             ret_match <<= 1
 
         shift = True
-        if char == '_':
+        if char == "_":
             shift = False
-        elif char == '0':
+        elif char == "0":
             ret_mask |= 1
-        elif char == '1':
+        elif char == "1":
             ret_mask |= 1
             ret_match |= 1
-        elif char == 'x':
+        elif char == "x":
             pass
         else:
             print("Can't parse implicant character", char)
 
     return (ret_mask, ret_match)
 
+
 #                          ,----- 36 thumb
 #                          | ,--- 35 bigThumb
 #                          | |,-- 34 aarch64
-a64_inst = make_implicant('0_01xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-a32_inst = make_implicant('0_00xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-t32_inst = make_implicant('1_10xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-t16_inst = make_implicant('1_00xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-any_inst = make_implicant('x_xxxx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+a64_inst = make_implicant("0_01xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
+a32_inst = make_implicant("0_00xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
+t32_inst = make_implicant("1_10xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
+t16_inst = make_implicant("1_00xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
+any_inst = make_implicant("x_xxxx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
 #                          | ||
-any_a64_inst = \
-           make_implicant('x_x1xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
-any_non_a64_inst = \
-           make_implicant('x_x0xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+any_a64_inst = make_implicant(
+    "x_x1xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx"
+)
+any_non_a64_inst = make_implicant(
+    "x_x0xx__xxxx_xxxx_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx"
+)
+
 
 def encode_opcode(pattern):
     def encode(opcode_string):
         a64_mask, a64_match = pattern
         mask, match = make_implicant(opcode_string)
         return (a64_mask | mask), (a64_match | match)
+
     return encode
 
+
 a64_opcode = encode_opcode(a64_inst)
 a32_opcode = encode_opcode(a32_inst)
 t32_opcode = encode_opcode(t32_inst)
@@ -100,30 +106,37 @@
 
 # These definitions (in some form) should probably be part of TimingExpr
 
+
 def literal(value):
     def body(env):
         ret = TimingExprLiteral()
         ret.value = value
         return ret
+
     return body
 
+
 def bin(op, left, right):
     def body(env):
         ret = TimingExprBin()
-        ret.op = 'timingExpr' + op
+        ret.op = "timingExpr" + op
         ret.left = left(env)
         ret.right = right(env)
         return ret
+
     return body
 
+
 def un(op, arg):
     def body(env):
         ret = TimingExprUn()
-        ret.op = 'timingExpr' + op
+        ret.op = "timingExpr" + op
         ret.arg = arg(env)
         return ret
+
     return body
 
+
 def ref(name):
     def body(env):
         if name in env:
@@ -133,8 +146,10 @@
             print("Invalid expression name", name)
             ret = TimingExprNull()
         return ret
+
     return body
 
+
 def if_expr(cond, true_expr, false_expr):
     def body(env):
         ret = TimingExprIf()
@@ -142,21 +157,18 @@
         ret.trueExpr = true_expr(env)
         ret.falseExpr = false_expr(env)
         return ret
+
     return body
 
-def src(index):
+
+def src_reg(index):
     def body(env):
         ret = TimingExprSrcReg()
         ret.index = index
         return ret
+
     return body
 
-def int_reg(reg):
-    def body(env):
-        ret = TimingExprReadIntReg()
-        ret.reg = reg(env)
-        return ret
-    return body
 
 def let(bindings, expr):
     def body(env):
@@ -180,972 +192,1296 @@
         ret.expr = expr(new_env)
 
         return ret
+
     return body
 
+
 def expr_top(expr):
     return expr([])
 
+
 class HPI_DefaultInt(MinorFUTiming):
-    description = 'HPI_DefaultInt'
+    description = "HPI_DefaultInt"
     mask, match = any_non_a64_inst
     srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
 
+
 class HPI_DefaultA64Int(MinorFUTiming):
-    description = 'HPI_DefaultA64Int'
+    description = "HPI_DefaultA64Int"
     mask, match = any_a64_inst
     # r, l, (c)
     srcRegsRelativeLats = [2, 2, 2, 0]
 
+
 class HPI_DefaultMul(MinorFUTiming):
-    description = 'HPI_DefaultMul'
+    description = "HPI_DefaultMul"
     mask, match = any_non_a64_inst
     # f, f, f, r, l, a?
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 0]
 
+
 class HPI_DefaultA64Mul(MinorFUTiming):
-    description = 'HPI_DefaultA64Mul'
+    description = "HPI_DefaultA64Mul"
     mask, match = any_a64_inst
     # a (zr for mul), l, r
     srcRegsRelativeLats = [0, 0, 0, 0]
     # extraCommitLat = 1
 
+
 class HPI_DefaultVfp(MinorFUTiming):
-    description = 'HPI_DefaultVfp'
+    description = "HPI_DefaultVfp"
     mask, match = any_non_a64_inst
     # cpsr, z, z, z, cpacr, fpexc, l_lo, r_lo, l_hi, r_hi (from vadd2h)
-    srcRegsRelativeLats = [5, 5, 5, 5, 5, 5,  2, 2, 2, 2, 2, 2, 2, 2, 0]
+    srcRegsRelativeLats = [5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
 
 class HPI_DefaultA64Vfp(MinorFUTiming):
-    description = 'HPI_DefaultA64Vfp'
+    description = "HPI_DefaultA64Vfp"
     mask, match = any_a64_inst
     # cpsr, cpacr_el1, fpscr_exc, ...
     srcRegsRelativeLats = [5, 5, 5, 2]
 
+
 class HPI_FMADD_A64(MinorFUTiming):
-    description = 'HPI_FMADD_A64'
-    mask, match = a64_opcode('0001_1111_0x0x_xxxx__0xxx_xxxx_xxxx_xxxx')
+    description = "HPI_FMADD_A64"
+    mask, match = a64_opcode("0001_1111_0x0x_xxxx__0xxx_xxxx_xxxx_xxxx")
     #                                    t
     # cpsr, cpacr_el1, fpscr_exc, 1, 1, 2, 2, 3, 3, fpscr_exc, d, d, d, d
-    srcRegsRelativeLats = [5, 5, 5,  0, 0,  0, 0,  1, 1,  0,  0, 0, 0, 0]
+    srcRegsRelativeLats = [5, 5, 5, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
+
 
 class HPI_FMSUB_D_A64(MinorFUTiming):
-    description = 'HPI_FMSUB_D_A64'
-    mask, match = a64_opcode('0001_1111_0x0x_xxxx__1xxx_xxxx_xxxx_xxxx')
+    description = "HPI_FMSUB_D_A64"
+    mask, match = a64_opcode("0001_1111_0x0x_xxxx__1xxx_xxxx_xxxx_xxxx")
     #                                    t
     # cpsr, cpacr_el1, fpscr_exc, 1, 1, 2, 2, 3, 3, fpscr_exc, d, d, d, d
-    srcRegsRelativeLats = [5, 5, 5,  0, 0,  0, 0,  1, 1,  0,  0, 0, 0, 0]
+    srcRegsRelativeLats = [5, 5, 5, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
+
 
 class HPI_FMOV_A64(MinorFUTiming):
-    description = 'HPI_FMOV_A64'
-    mask, match = a64_opcode('0001_1110_0x10_0000__0100_00xx_xxxx_xxxx')
+    description = "HPI_FMOV_A64"
+    mask, match = a64_opcode("0001_1110_0x10_0000__0100_00xx_xxxx_xxxx")
     # cpsr, cpacr_el1, fpscr_exc, 1, 1, 2, 2, 3, 3, fpscr_exc, d, d, d, d
     srcRegsRelativeLats = [5, 5, 5, 0]
 
+
 class HPI_ADD_SUB_vector_scalar_A64(MinorFUTiming):
-    description = 'HPI_ADD_SUB_vector_scalar_A64'
-    mask, match = a64_opcode('01x1_1110_xx1x_xxxx__1000_01xx_xxxx_xxxx')
+    description = "HPI_ADD_SUB_vector_scalar_A64"
+    mask, match = a64_opcode("01x1_1110_xx1x_xxxx__1000_01xx_xxxx_xxxx")
     # cpsr, z, z, z, cpacr, fpexc, l0, r0, l1, r1, l2, r2, l3, r3 (for vadd2h)
     srcRegsRelativeLats = [5, 5, 5, 4]
 
 
 class HPI_ADD_SUB_vector_vector_A64(MinorFUTiming):
-    description = 'HPI_ADD_SUB_vector_vector_A64'
-    mask, match = a64_opcode('0xx0_1110_xx1x_xxxx__1000_01xx_xxxx_xxxx')
+    description = "HPI_ADD_SUB_vector_vector_A64"
+    mask, match = a64_opcode("0xx0_1110_xx1x_xxxx__1000_01xx_xxxx_xxxx")
     # cpsr, z, z, z, cpacr, fpexc, l0, r0, l1, r1, l2, r2, l3, r3 (for vadd2h)
     srcRegsRelativeLats = [5, 5, 5, 4]
 
+
 class HPI_FDIV_scalar_32_A64(MinorFUTiming):
-    description = 'HPI_FDIV_scalar_32_A64'
-    mask, match = a64_opcode('0001_1110_001x_xxxx__0001_10xx_xxxx_xxxx')
+    description = "HPI_FDIV_scalar_32_A64"
+    mask, match = a64_opcode("0001_1110_001x_xxxx__0001_10xx_xxxx_xxxx")
     extraCommitLat = 6
-    srcRegsRelativeLats = [0, 0, 0, 20,  4]
+    srcRegsRelativeLats = [0, 0, 0, 20, 4]
+
 
 class HPI_FDIV_scalar_64_A64(MinorFUTiming):
-    description = 'HPI_FDIV_scalar_64_A64'
-    mask, match = a64_opcode('0001_1110_011x_xxxx__0001_10xx_xxxx_xxxx')
+    description = "HPI_FDIV_scalar_64_A64"
+    mask, match = a64_opcode("0001_1110_011x_xxxx__0001_10xx_xxxx_xxxx")
     extraCommitLat = 15
-    srcRegsRelativeLats = [0, 0, 0, 20,  4]
+    srcRegsRelativeLats = [0, 0, 0, 20, 4]
+
 
 # CINC CINV CSEL CSET CSETM CSINC CSINC CSINV CSINV CSNEG
 class HPI_Cxxx_A64(MinorFUTiming):
-    description = 'HPI_Cxxx_A64'
-    mask, match = a64_opcode('xx01_1010_100x_xxxx_xxxx__0xxx_xxxx_xxxx')
+    description = "HPI_Cxxx_A64"
+    mask, match = a64_opcode("xx01_1010_100x_xxxx_xxxx__0xxx_xxxx_xxxx")
     srcRegsRelativeLats = [3, 3, 3, 2, 2]
 
+
 class HPI_DefaultMem(MinorFUTiming):
-    description = 'HPI_DefaultMem'
+    description = "HPI_DefaultMem"
     mask, match = any_non_a64_inst
     srcRegsRelativeLats = [1, 1, 1, 1, 1, 2]
     # Assume that LDR/STR take 2 cycles for resolving dependencies
     # (1 + 1 of the FU)
     extraAssumedLat = 2
 
+
 class HPI_DefaultMem64(MinorFUTiming):
-    description = 'HPI_DefaultMem64'
+    description = "HPI_DefaultMem64"
     mask, match = any_a64_inst
     srcRegsRelativeLats = [2]
     # Assume that LDR/STR take 2 cycles for resolving dependencies
     # (1 + 1 of the FU)
     extraAssumedLat = 3
 
+
 class HPI_DataProcessingMovShiftr(MinorFUTiming):
-    description = 'HPI_DataProcessingMovShiftr'
-    mask, match = a32_opcode('xxxx_0001_101x_xxxx__xxxx_xxxx_xxx1_xxxx')
+    description = "HPI_DataProcessingMovShiftr"
+    mask, match = a32_opcode("xxxx_0001_101x_xxxx__xxxx_xxxx_xxx1_xxxx")
     srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
 
+
 class HPI_DataProcessingMayShift(MinorFUTiming):
-    description = 'HPI_DataProcessingMayShift'
-    mask, match = a32_opcode('xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+    description = "HPI_DataProcessingMayShift"
+    mask, match = a32_opcode("xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
     srcRegsRelativeLats = [3, 3, 2, 2, 1, 1, 0]
 
+
 class HPI_DataProcessingNoShift(MinorFUTiming):
-    description = 'HPI_DataProcessingNoShift'
-    mask, match = a32_opcode('xxxx_000x_xxxx_xxxx__xxxx_0000_0xx0_xxxx')
+    description = "HPI_DataProcessingNoShift"
+    mask, match = a32_opcode("xxxx_000x_xxxx_xxxx__xxxx_0000_0xx0_xxxx")
     srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
 
+
 class HPI_DataProcessingAllowShifti(MinorFUTiming):
-    description = 'HPI_DataProcessingAllowShifti'
-    mask, match = a32_opcode('xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxx0_xxxx')
+    description = "HPI_DataProcessingAllowShifti"
+    mask, match = a32_opcode("xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxx0_xxxx")
     srcRegsRelativeLats = [3, 3, 2, 2, 1, 1, 0]
 
+
 class HPI_DataProcessingSuppressShift(MinorFUTiming):
-    description = 'HPI_DataProcessingSuppressShift'
-    mask, match = a32_opcode('xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+    description = "HPI_DataProcessingSuppressShift"
+    mask, match = a32_opcode("xxxx_000x_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
     srcRegsRelativeLats = []
     suppress = True
 
+
 class HPI_DataProcessingSuppressBranch(MinorFUTiming):
-    description = 'HPI_DataProcessingSuppressBranch'
-    mask, match = a32_opcode('xxxx_1010_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx')
+    description = "HPI_DataProcessingSuppressBranch"
+    mask, match = a32_opcode("xxxx_1010_xxxx_xxxx__xxxx_xxxx_xxxx_xxxx")
     srcRegsRelativeLats = []
     suppress = True
 
+
 class HPI_BFI_T1(MinorFUTiming):
-    description = 'HPI_BFI_T1'
-    mask, match = t32_opcode('1111_0x11_0110_xxxx__0xxx_xxxx_xxxx_xxxx')
+    description = "HPI_BFI_T1"
+    mask, match = t32_opcode("1111_0x11_0110_xxxx__0xxx_xxxx_xxxx_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
 
+
 class HPI_BFI_A1(MinorFUTiming):
-    description = 'HPI_BFI_A1'
-    mask, match = a32_opcode('xxxx_0111_110x_xxxx__xxxx_xxxx_x001_xxxx')
+    description = "HPI_BFI_A1"
+    mask, match = a32_opcode("xxxx_0111_110x_xxxx__xxxx_xxxx_x001_xxxx")
     # f, f, f, dest, src
     srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
 
+
 class HPI_CLZ_T1(MinorFUTiming):
-    description = 'HPI_CLZ_T1'
-    mask, match = t32_opcode('1111_1010_1011_xxxx__1111_xxxx_1000_xxxx')
+    description = "HPI_CLZ_T1"
+    mask, match = t32_opcode("1111_1010_1011_xxxx__1111_xxxx_1000_xxxx")
     srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
 
+
 class HPI_CLZ_A1(MinorFUTiming):
-    description = 'HPI_CLZ_A1'
-    mask, match = a32_opcode('xxxx_0001_0110_xxxx__xxxx_xxxx_0001_xxxx')
+    description = "HPI_CLZ_A1"
+    mask, match = a32_opcode("xxxx_0001_0110_xxxx__xxxx_xxxx_0001_xxxx")
     srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
 
+
 class HPI_CMN_immediate_A1(MinorFUTiming):
-    description = 'HPI_CMN_immediate_A1'
-    mask, match = a32_opcode('xxxx_0011_0111_xxxx__xxxx_xxxx_xxxx_xxxx')
+    description = "HPI_CMN_immediate_A1"
+    mask, match = a32_opcode("xxxx_0011_0111_xxxx__xxxx_xxxx_xxxx_xxxx")
     srcRegsRelativeLats = [3, 3, 3, 2, 2, 3, 3, 3, 0]
 
+
 class HPI_CMN_register_A1(MinorFUTiming):
-    description = 'HPI_CMN_register_A1'
-    mask, match = a32_opcode('xxxx_0001_0111_xxxx__xxxx_xxxx_xxx0_xxxx')
+    description = "HPI_CMN_register_A1"
+    mask, match = a32_opcode("xxxx_0001_0111_xxxx__xxxx_xxxx_xxx0_xxxx")
     srcRegsRelativeLats = [3, 3, 3, 2, 2, 3, 3, 3, 0]
 
+
 class HPI_CMP_immediate_A1(MinorFUTiming):
-    description = 'HPI_CMP_immediate_A1'
-    mask, match = a32_opcode('xxxx_0011_0101_xxxx__xxxx_xxxx_xxxx_xxxx')
+    description = "HPI_CMP_immediate_A1"
+    mask, match = a32_opcode("xxxx_0011_0101_xxxx__xxxx_xxxx_xxxx_xxxx")
     srcRegsRelativeLats = [3, 3, 3, 2, 2, 3, 3, 3, 0]
 
+
 class HPI_CMP_register_A1(MinorFUTiming):
-    description = 'HPI_CMP_register_A1'
-    mask, match = a32_opcode('xxxx_0001_0101_xxxx__xxxx_xxxx_xxx0_xxxx')
+    description = "HPI_CMP_register_A1"
+    mask, match = a32_opcode("xxxx_0001_0101_xxxx__xxxx_xxxx_xxx0_xxxx")
     srcRegsRelativeLats = [3, 3, 3, 2, 2, 3, 3, 3, 0]
 
+
 class HPI_MLA_T1(MinorFUTiming):
-    description = 'HPI_MLA_T1'
-    mask, match = t32_opcode('1111_1011_0000_xxxx__xxxx_xxxx_0000_xxxx')
+    description = "HPI_MLA_T1"
+    mask, match = t32_opcode("1111_1011_0000_xxxx__xxxx_xxxx_0000_xxxx")
     # z, z, z, a, l?, r?
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
 
+
 class HPI_MLA_A1(MinorFUTiming):
-    description = 'HPI_MLA_A1'
-    mask, match = a32_opcode('xxxx_0000_001x_xxxx__xxxx_xxxx_1001_xxxx')
+    description = "HPI_MLA_A1"
+    mask, match = a32_opcode("xxxx_0000_001x_xxxx__xxxx_xxxx_1001_xxxx")
     # z, z, z, a, l?, r?
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
 
+
 class HPI_MADD_A64(MinorFUTiming):
-    description = 'HPI_MADD_A64'
-    mask, match = a64_opcode('x001_1011_000x_xxxx__0xxx_xxxx_xxxx_xxxx')
+    description = "HPI_MADD_A64"
+    mask, match = a64_opcode("x001_1011_000x_xxxx__0xxx_xxxx_xxxx_xxxx")
     # a, l?, r?
     srcRegsRelativeLats = [1, 1, 1, 0]
     extraCommitLat = 1
 
+
 class HPI_MLS_T1(MinorFUTiming):
-    description = 'HPI_MLS_T1'
-    mask, match = t32_opcode('1111_1011_0000_xxxx__xxxx_xxxx_0001_xxxx')
+    description = "HPI_MLS_T1"
+    mask, match = t32_opcode("1111_1011_0000_xxxx__xxxx_xxxx_0001_xxxx")
     # z, z, z, l?, a, r?
     srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
 
+
 class HPI_MLS_A1(MinorFUTiming):
-    description = 'HPI_MLS_A1'
-    mask, match = a32_opcode('xxxx_0000_0110_xxxx__xxxx_xxxx_1001_xxxx')
+    description = "HPI_MLS_A1"
+    mask, match = a32_opcode("xxxx_0000_0110_xxxx__xxxx_xxxx_1001_xxxx")
     # z, z, z, l?, a, r?
     srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
 
+
 class HPI_MOVT_A1(MinorFUTiming):
-    description = 'HPI_MOVT_A1'
-    mask, match = t32_opcode('xxxx_0010_0100_xxxx__xxxx_xxxx_xxxx_xxxx')
+    description = "HPI_MOVT_A1"
+    mask, match = t32_opcode("xxxx_0010_0100_xxxx__xxxx_xxxx_xxxx_xxxx")
+
 
 class HPI_MUL_T1(MinorFUTiming):
-    description = 'HPI_MUL_T1'
-    mask, match = t16_opcode('0100_0011_01xx_xxxx')
+    description = "HPI_MUL_T1"
+    mask, match = t16_opcode("0100_0011_01xx_xxxx")
+
+
 class HPI_MUL_T2(MinorFUTiming):
-    description = 'HPI_MUL_T2'
-    mask, match = t32_opcode('1111_1011_0000_xxxx_1111_xxxx_0000_xxxx')
+    description = "HPI_MUL_T2"
+    mask, match = t32_opcode("1111_1011_0000_xxxx_1111_xxxx_0000_xxxx")
+
 
 class HPI_PKH_T1(MinorFUTiming):
-    description = 'HPI_PKH_T1'
-    mask, match = t32_opcode('1110_1010_110x_xxxx__xxxx_xxxx_xxxx_xxxx')
+    description = "HPI_PKH_T1"
+    mask, match = t32_opcode("1110_1010_110x_xxxx__xxxx_xxxx_xxxx_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 1, 0]
 
+
 class HPI_PKH_A1(MinorFUTiming):
-    description = 'HPI_PKH_A1'
-    mask, match = a32_opcode('xxxx_0110_1000_xxxx__xxxx_xxxx_xx01_xxxx')
+    description = "HPI_PKH_A1"
+    mask, match = a32_opcode("xxxx_0110_1000_xxxx__xxxx_xxxx_xx01_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 1, 0]
 
+
 class HPI_QADD_QSUB_T1(MinorFUTiming):
-    description = 'HPI_QADD_QSUB_T1'
-    mask, match = t32_opcode('1111_1010_1000_xxxx__1111_xxxx_10x0_xxxx')
+    description = "HPI_QADD_QSUB_T1"
+    mask, match = t32_opcode("1111_1010_1000_xxxx__1111_xxxx_10x0_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
 
+
 class HPI_QADD_QSUB_A1(MinorFUTiming):
-    description = 'HPI_QADD_QSUB_A1'
-    mask, match = a32_opcode('xxxx_0001_00x0_xxxx__xxxx_xxxx_0101_xxxx')
+    description = "HPI_QADD_QSUB_A1"
+    mask, match = a32_opcode("xxxx_0001_00x0_xxxx__xxxx_xxxx_0101_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
 
+
 # T1 QADD16 QADD8 QSUB16 QSUB8 UQADD16 UQADD8 UQSUB16 UQSUB8
 class HPI_QADD_ETC_T1(MinorFUTiming):
-    description = 'HPI_QADD_ETC_T1'
-    mask, match = t32_opcode('1111_1010_1x0x_xxxx__1111_xxxx_0x01_xxxx')
+    description = "HPI_QADD_ETC_T1"
+    mask, match = t32_opcode("1111_1010_1x0x_xxxx__1111_xxxx_0x01_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
 
+
 # A1 QADD16 QADD8 QSAX QSUB16 QSUB8 UQADD16 UQADD8 UQASX UQSAX UQSUB16 UQSUB8
 class HPI_QADD_ETC_A1(MinorFUTiming):
-    description = 'HPI_QADD_ETC_A1'
-    mask, match = a32_opcode('xxxx_0110_0x10_xxxx__xxxx_xxxx_xxx1_xxxx')
+    description = "HPI_QADD_ETC_A1"
+    mask, match = a32_opcode("xxxx_0110_0x10_xxxx__xxxx_xxxx_xxx1_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
 
+
 class HPI_QASX_QSAX_UQASX_UQSAX_T1(MinorFUTiming):
-    description = 'HPI_QASX_QSAX_UQASX_UQSAX_T1'
-    mask, match = t32_opcode('1111_1010_1x10_xxxx__1111_xxxx_0x01_xxxx')
+    description = "HPI_QASX_QSAX_UQASX_UQSAX_T1"
+    mask, match = t32_opcode("1111_1010_1x10_xxxx__1111_xxxx_0x01_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 1, 0]
 
+
 class HPI_QDADD_QDSUB_T1(MinorFUTiming):
-    description = 'HPI_QDADD_QDSUB_T1'
-    mask, match = t32_opcode('1111_1010_1000_xxxx__1111_xxxx_10x1_xxxx')
+    description = "HPI_QDADD_QDSUB_T1"
+    mask, match = t32_opcode("1111_1010_1000_xxxx__1111_xxxx_10x1_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 1, 0]
 
+
 class HPI_QDADD_QDSUB_A1(MinorFUTiming):
-    description = 'HPI_QDADD_QSUB_A1'
-    mask, match = a32_opcode('xxxx_0001_01x0_xxxx__xxxx_xxxx_0101_xxxx')
+    description = "HPI_QDADD_QSUB_A1"
+    mask, match = a32_opcode("xxxx_0001_01x0_xxxx__xxxx_xxxx_0101_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 1, 0]
 
+
 class HPI_RBIT_A1(MinorFUTiming):
-    description = 'HPI_RBIT_A1'
-    mask, match = a32_opcode('xxxx_0110_1111_xxxx__xxxx_xxxx_0011_xxxx')
+    description = "HPI_RBIT_A1"
+    mask, match = a32_opcode("xxxx_0110_1111_xxxx__xxxx_xxxx_0011_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 0]
 
+
 class HPI_REV_REV16_A1(MinorFUTiming):
-    description = 'HPI_REV_REV16_A1'
-    mask, match = a32_opcode('xxxx_0110_1011_xxxx__xxxx_xxxx_x011_xxxx')
+    description = "HPI_REV_REV16_A1"
+    mask, match = a32_opcode("xxxx_0110_1011_xxxx__xxxx_xxxx_x011_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 0]
 
+
 class HPI_REVSH_A1(MinorFUTiming):
-    description = 'HPI_REVSH_A1'
-    mask, match = a32_opcode('xxxx_0110_1111_xxxx__xxxx_xxxx_1011_xxxx')
+    description = "HPI_REVSH_A1"
+    mask, match = a32_opcode("xxxx_0110_1111_xxxx__xxxx_xxxx_1011_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 0]
 
+
 class HPI_ADD_ETC_A1(MinorFUTiming):
-    description = 'HPI_ADD_ETC_A1'
-    mask, match = a32_opcode('xxxx_0110_0xx1_xxxx__xxxx_xxxx_x001_xxxx')
+    description = "HPI_ADD_ETC_A1"
+    mask, match = a32_opcode("xxxx_0110_0xx1_xxxx__xxxx_xxxx_x001_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 2, 0]
 
+
 class HPI_ADD_ETC_T1(MinorFUTiming):
-    description = 'HPI_ADD_ETC_A1'
-    mask, match = t32_opcode('1111_1010_100x_xxxx__1111_xxxx_0xx0_xxxx')
+    description = "HPI_ADD_ETC_A1"
+    mask, match = t32_opcode("1111_1010_100x_xxxx__1111_xxxx_0xx0_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 2, 0]
 
+
 class HPI_SASX_SHASX_UASX_UHASX_A1(MinorFUTiming):
-    description = 'HPI_SASX_SHASX_UASX_UHASX_A1'
-    mask, match = a32_opcode('xxxx_0110_0xx1_xxxx__xxxx_xxxx_0011_xxxx')
+    description = "HPI_SASX_SHASX_UASX_UHASX_A1"
+    mask, match = a32_opcode("xxxx_0110_0xx1_xxxx__xxxx_xxxx_0011_xxxx")
     srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
 
+
 class HPI_SBFX_UBFX_A1(MinorFUTiming):
-    description = 'HPI_SBFX_UBFX_A1'
-    mask, match = a32_opcode('xxxx_0111_1x1x_xxxx__xxxx_xxxx_x101_xxxx')
+    description = "HPI_SBFX_UBFX_A1"
+    mask, match = a32_opcode("xxxx_0111_1x1x_xxxx__xxxx_xxxx_x101_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 0]
 
+
 ### SDIV
 
-sdiv_lat_expr = expr_top(let([
-    ('left', un('SignExtend32To64', int_reg(src(4)))),
-    ('right', un('SignExtend32To64', int_reg(src(3)))),
-    ('either_signed', bin('Or',
-        bin('SLessThan', ref('left'), literal(0)),
-        bin('SLessThan', ref('right'), literal(0)))),
-    ('left_size', un('SizeInBits', un('Abs', ref('left')))),
-    ('signed_adjust', if_expr(ref('either_signed'), literal(1), literal(0))),
-    ('right_size', un('SizeInBits',
-        bin('UDiv', un('Abs', ref('right')),
-            if_expr(ref('either_signed'), literal(4), literal(2))))),
-    ('left_minus_right', if_expr(
-        bin('SLessThan', ref('left_size'), ref('right_size')),
-        literal(0),
-        bin('Sub', ref('left_size'), ref('right_size'))))
-    ],
-    bin('Add',
-        ref('signed_adjust'),
-        if_expr(bin('Equal', ref('right'), literal(0)),
-            literal(0),
-            bin('UDiv', ref('left_minus_right'), literal(4))))
-    ))
+sdiv_lat_expr = expr_top(
+    let(
+        [
+            ("left", un("SignExtend32To64", src_reg(4))),
+            ("right", un("SignExtend32To64", src_reg(3))),
+            (
+                "either_signed",
+                bin(
+                    "Or",
+                    bin("SLessThan", ref("left"), literal(0)),
+                    bin("SLessThan", ref("right"), literal(0)),
+                ),
+            ),
+            ("left_size", un("SizeInBits", un("Abs", ref("left")))),
+            (
+                "signed_adjust",
+                if_expr(ref("either_signed"), literal(1), literal(0)),
+            ),
+            (
+                "right_size",
+                un(
+                    "SizeInBits",
+                    bin(
+                        "UDiv",
+                        un("Abs", ref("right")),
+                        if_expr(ref("either_signed"), literal(4), literal(2)),
+                    ),
+                ),
+            ),
+            (
+                "left_minus_right",
+                if_expr(
+                    bin("SLessThan", ref("left_size"), ref("right_size")),
+                    literal(0),
+                    bin("Sub", ref("left_size"), ref("right_size")),
+                ),
+            ),
+        ],
+        bin(
+            "Add",
+            ref("signed_adjust"),
+            if_expr(
+                bin("Equal", ref("right"), literal(0)),
+                literal(0),
+                bin("UDiv", ref("left_minus_right"), literal(4)),
+            ),
+        ),
+    )
+)
 
-sdiv_lat_expr64 = expr_top(let([
-    ('left', un('SignExtend32To64', int_reg(src(0)))),
-    ('right', un('SignExtend32To64', int_reg(src(1)))),
-    ('either_signed', bin('Or',
-        bin('SLessThan', ref('left'), literal(0)),
-        bin('SLessThan', ref('right'), literal(0)))),
-    ('left_size', un('SizeInBits', un('Abs', ref('left')))),
-    ('signed_adjust', if_expr(ref('either_signed'), literal(1), literal(0))),
-    ('right_size', un('SizeInBits',
-        bin('UDiv', un('Abs', ref('right')),
-            if_expr(ref('either_signed'), literal(4), literal(2))))),
-    ('left_minus_right', if_expr(
-        bin('SLessThan', ref('left_size'), ref('right_size')),
-        literal(0),
-        bin('Sub', ref('left_size'), ref('right_size'))))
-    ],
-    bin('Add',
-        ref('signed_adjust'),
-        if_expr(bin('Equal', ref('right'), literal(0)),
-            literal(0),
-            bin('UDiv', ref('left_minus_right'), literal(4))))
-    ))
+sdiv_lat_expr64 = expr_top(
+    let(
+        [
+            ("left", un("SignExtend32To64", src_reg(0))),
+            ("right", un("SignExtend32To64", src_reg(1))),
+            (
+                "either_signed",
+                bin(
+                    "Or",
+                    bin("SLessThan", ref("left"), literal(0)),
+                    bin("SLessThan", ref("right"), literal(0)),
+                ),
+            ),
+            ("left_size", un("SizeInBits", un("Abs", ref("left")))),
+            (
+                "signed_adjust",
+                if_expr(ref("either_signed"), literal(1), literal(0)),
+            ),
+            (
+                "right_size",
+                un(
+                    "SizeInBits",
+                    bin(
+                        "UDiv",
+                        un("Abs", ref("right")),
+                        if_expr(ref("either_signed"), literal(4), literal(2)),
+                    ),
+                ),
+            ),
+            (
+                "left_minus_right",
+                if_expr(
+                    bin("SLessThan", ref("left_size"), ref("right_size")),
+                    literal(0),
+                    bin("Sub", ref("left_size"), ref("right_size")),
+                ),
+            ),
+        ],
+        bin(
+            "Add",
+            ref("signed_adjust"),
+            if_expr(
+                bin("Equal", ref("right"), literal(0)),
+                literal(0),
+                bin("UDiv", ref("left_minus_right"), literal(4)),
+            ),
+        ),
+    )
+)
+
 
 class HPI_SDIV_A1(MinorFUTiming):
-    description = 'HPI_SDIV_A1'
-    mask, match = a32_opcode('xxxx_0111_0001_xxxx__xxxx_xxxx_0001_xxxx')
+    description = "HPI_SDIV_A1"
+    mask, match = a32_opcode("xxxx_0111_0001_xxxx__xxxx_xxxx_0001_xxxx")
     extraCommitLat = 0
     srcRegsRelativeLats = []
     extraCommitLatExpr = sdiv_lat_expr
 
+
 class HPI_SDIV_A64(MinorFUTiming):
-    description = 'HPI_SDIV_A64'
-    mask, match = a64_opcode('x001_1010_110x_xxxx__0000_11xx_xxxx_xxxx')
+    description = "HPI_SDIV_A64"
+    mask, match = a64_opcode("x001_1010_110x_xxxx__0000_11xx_xxxx_xxxx")
     extraCommitLat = 0
     srcRegsRelativeLats = []
     extraCommitLatExpr = sdiv_lat_expr64
 
+
 ### SEL
 
+
 class HPI_SEL_A1(MinorFUTiming):
-    description = 'HPI_SEL_A1'
-    mask, match = a32_opcode('xxxx_0110_1000_xxxx__xxxx_xxxx_1011_xxxx')
+    description = "HPI_SEL_A1"
+    mask, match = a32_opcode("xxxx_0110_1000_xxxx__xxxx_xxxx_1011_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 2, 2, 0]
 
+
 class HPI_SEL_A1_Suppress(MinorFUTiming):
-    description = 'HPI_SEL_A1_Suppress'
-    mask, match = a32_opcode('xxxx_0110_1000_xxxx__xxxx_xxxx_1011_xxxx')
+    description = "HPI_SEL_A1_Suppress"
+    mask, match = a32_opcode("xxxx_0110_1000_xxxx__xxxx_xxxx_1011_xxxx")
     srcRegsRelativeLats = []
     suppress = True
 
+
 class HPI_SHSAX_SSAX_UHSAX_USAX_A1(MinorFUTiming):
-    description = 'HPI_SHSAX_SSAX_UHSAX_USAX_A1'
-    mask, match = a32_opcode('xxxx_0110_0xx1_xxxx__xxxx_xxxx_0101_xxxx')
+    description = "HPI_SHSAX_SSAX_UHSAX_USAX_A1"
+    mask, match = a32_opcode("xxxx_0110_0xx1_xxxx__xxxx_xxxx_0101_xxxx")
     # As Default
     srcRegsRelativeLats = [3, 3, 2, 2, 2, 1, 0]
 
+
 class HPI_USUB_ETC_A1(MinorFUTiming):
-    description = 'HPI_USUB_ETC_A1'
-    mask, match = a32_opcode('xxxx_0110_0xx1_xxxx__xxxx_xxxx_x111_xxxx')
+    description = "HPI_USUB_ETC_A1"
+    mask, match = a32_opcode("xxxx_0110_0xx1_xxxx__xxxx_xxxx_x111_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 2, 0]
 
+
 class HPI_SMLABB_T1(MinorFUTiming):
-    description = 'HPI_SMLABB_T1'
-    mask, match = t32_opcode('1111_1011_0001_xxxx__xxxx_xxxx_00xx_xxxx')
+    description = "HPI_SMLABB_T1"
+    mask, match = t32_opcode("1111_1011_0001_xxxx__xxxx_xxxx_00xx_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
 
+
 class HPI_SMLABB_A1(MinorFUTiming):
-    description = 'HPI_SMLABB_A1'
-    mask, match = a32_opcode('xxxx_0001_0000_xxxx__xxxx_xxxx_1xx0_xxxx')
+    description = "HPI_SMLABB_A1"
+    mask, match = a32_opcode("xxxx_0001_0000_xxxx__xxxx_xxxx_1xx0_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
 
+
 class HPI_SMLAD_T1(MinorFUTiming):
-    description = 'HPI_SMLAD_T1'
-    mask, match = t32_opcode('1111_1011_0010_xxxx__xxxx_xxxx_000x_xxxx')
+    description = "HPI_SMLAD_T1"
+    mask, match = t32_opcode("1111_1011_0010_xxxx__xxxx_xxxx_000x_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
 
+
 class HPI_SMLAD_A1(MinorFUTiming):
-    description = 'HPI_SMLAD_A1'
-    mask, match = a32_opcode('xxxx_0111_0000_xxxx__xxxx_xxxx_00x1_xxxx')
+    description = "HPI_SMLAD_A1"
+    mask, match = a32_opcode("xxxx_0111_0000_xxxx__xxxx_xxxx_00x1_xxxx")
     # z, z, z, l, r, a
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
 
+
 class HPI_SMLAL_T1(MinorFUTiming):
-    description = 'HPI_SMLAL_T1'
-    mask, match = t32_opcode('1111_1011_1100_xxxx__xxxx_xxxx_0000_xxxx')
+    description = "HPI_SMLAL_T1"
+    mask, match = t32_opcode("1111_1011_1100_xxxx__xxxx_xxxx_0000_xxxx")
+
+
 class HPI_SMLAL_A1(MinorFUTiming):
-    description = 'HPI_SMLAL_A1'
-    mask, match = a32_opcode('xxxx_0000_111x_xxxx__xxxx_xxxx_1001_xxxx')
+    description = "HPI_SMLAL_A1"
+    mask, match = a32_opcode("xxxx_0000_111x_xxxx__xxxx_xxxx_1001_xxxx")
+
 
 class HPI_SMLALBB_T1(MinorFUTiming):
-    description = 'HPI_SMLALBB_T1'
-    mask, match = t32_opcode('1111_1011_1100_xxxx__xxxx_xxxx_10xx_xxxx')
+    description = "HPI_SMLALBB_T1"
+    mask, match = t32_opcode("1111_1011_1100_xxxx__xxxx_xxxx_10xx_xxxx")
+
+
 class HPI_SMLALBB_A1(MinorFUTiming):
-    description = 'HPI_SMLALBB_A1'
-    mask, match = a32_opcode('xxxx_0001_0100_xxxx__xxxx_xxxx_1xx0_xxxx')
+    description = "HPI_SMLALBB_A1"
+    mask, match = a32_opcode("xxxx_0001_0100_xxxx__xxxx_xxxx_1xx0_xxxx")
+
 
 class HPI_SMLALD_T1(MinorFUTiming):
-    description = 'HPI_SMLALD_T1'
-    mask, match = t32_opcode('1111_1011_1100_xxxx__xxxx_xxxx_110x_xxxx')
+    description = "HPI_SMLALD_T1"
+    mask, match = t32_opcode("1111_1011_1100_xxxx__xxxx_xxxx_110x_xxxx")
+
+
 class HPI_SMLALD_A1(MinorFUTiming):
-    description = 'HPI_SMLALD_A1'
-    mask, match = a32_opcode('xxxx_0111_0100_xxxx__xxxx_xxxx_00x1_xxxx')
+    description = "HPI_SMLALD_A1"
+    mask, match = a32_opcode("xxxx_0111_0100_xxxx__xxxx_xxxx_00x1_xxxx")
+
 
 class HPI_SMLAWB_T1(MinorFUTiming):
-    description = 'HPI_SMLAWB_T1'
-    mask, match = t32_opcode('1111_1011_0011_xxxx__xxxx_xxxx_000x_xxxx')
+    description = "HPI_SMLAWB_T1"
+    mask, match = t32_opcode("1111_1011_0011_xxxx__xxxx_xxxx_000x_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
 
+
 class HPI_SMLAWB_A1(MinorFUTiming):
-    description = 'HPI_SMLAWB_A1'
-    mask, match = a32_opcode('xxxx_0001_0010_xxxx__xxxx_xxxx_1x00_xxxx')
+    description = "HPI_SMLAWB_A1"
+    mask, match = a32_opcode("xxxx_0001_0010_xxxx__xxxx_xxxx_1x00_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
 
+
 class HPI_SMLSD_A1(MinorFUTiming):
-    description = 'HPI_SMLSD_A1'
-    mask, match = a32_opcode('xxxx_0111_0000_xxxx__xxxx_xxxx_01x1_xxxx')
+    description = "HPI_SMLSD_A1"
+    mask, match = a32_opcode("xxxx_0111_0000_xxxx__xxxx_xxxx_01x1_xxxx")
+
 
 class HPI_SMLSLD_T1(MinorFUTiming):
-    description = 'HPI_SMLSLD_T1'
-    mask, match = t32_opcode('1111_1011_1101_xxxx__xxxx_xxxx_110x_xxxx')
+    description = "HPI_SMLSLD_T1"
+    mask, match = t32_opcode("1111_1011_1101_xxxx__xxxx_xxxx_110x_xxxx")
+
+
 class HPI_SMLSLD_A1(MinorFUTiming):
-    description = 'HPI_SMLSLD_A1'
-    mask, match = a32_opcode('xxxx_0111_0100_xxxx__xxxx_xxxx_01x1_xxxx')
+    description = "HPI_SMLSLD_A1"
+    mask, match = a32_opcode("xxxx_0111_0100_xxxx__xxxx_xxxx_01x1_xxxx")
+
 
 class HPI_SMMLA_T1(MinorFUTiming):
-    description = 'HPI_SMMLA_T1'
-    mask, match = t32_opcode('1111_1011_0101_xxxx__xxxx_xxxx_000x_xxxx')
+    description = "HPI_SMMLA_T1"
+    mask, match = t32_opcode("1111_1011_0101_xxxx__xxxx_xxxx_000x_xxxx")
     #                                              ^^^^ != 1111
     srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
 
+
 class HPI_SMMLA_A1(MinorFUTiming):
-    description = 'HPI_SMMLA_A1'
+    description = "HPI_SMMLA_A1"
     # Note that this must be after the encoding for SMMUL
-    mask, match = a32_opcode('xxxx_0111_0101_xxxx__xxxx_xxxx_00x1_xxxx')
+    mask, match = a32_opcode("xxxx_0111_0101_xxxx__xxxx_xxxx_00x1_xxxx")
     #                                              ^^^^ != 1111
     srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
 
+
 class HPI_SMMLS_T1(MinorFUTiming):
-    description = 'HPI_SMMLS_T1'
-    mask, match = t32_opcode('1111_1011_0110_xxxx__xxxx_xxxx_000x_xxxx')
+    description = "HPI_SMMLS_T1"
+    mask, match = t32_opcode("1111_1011_0110_xxxx__xxxx_xxxx_000x_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
 
+
 class HPI_SMMLS_A1(MinorFUTiming):
-    description = 'HPI_SMMLS_A1'
-    mask, match = a32_opcode('xxxx_0111_0101_xxxx__xxxx_xxxx_11x1_xxxx')
+    description = "HPI_SMMLS_A1"
+    mask, match = a32_opcode("xxxx_0111_0101_xxxx__xxxx_xxxx_11x1_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 0, 0, 0]
 
+
 class HPI_SMMUL_T1(MinorFUTiming):
-    description = 'HPI_SMMUL_T1'
-    mask, match = t32_opcode('1111_1011_0101_xxxx__1111_xxxx_000x_xxxx')
+    description = "HPI_SMMUL_T1"
+    mask, match = t32_opcode("1111_1011_0101_xxxx__1111_xxxx_000x_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 0]
 
+
 class HPI_SMMUL_A1(MinorFUTiming):
-    description = 'HPI_SMMUL_A1'
-    mask, match = a32_opcode('xxxx_0111_0101_xxxx__1111_xxxx_00x1_xxxx')
+    description = "HPI_SMMUL_A1"
+    mask, match = a32_opcode("xxxx_0111_0101_xxxx__1111_xxxx_00x1_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 0]
 
+
 class HPI_SMUAD_T1(MinorFUTiming):
-    description = 'HPI_SMUAD_T1'
-    mask, match = t32_opcode('1111_1011_0010_xxxx__1111_xxxx_000x_xxxx')
+    description = "HPI_SMUAD_T1"
+    mask, match = t32_opcode("1111_1011_0010_xxxx__1111_xxxx_000x_xxxx")
+
+
 class HPI_SMUAD_A1(MinorFUTiming):
-    description = 'HPI_SMUAD_A1'
-    mask, match = a32_opcode('xxxx_0111_0000_xxxx__1111_xxxx_00x1_xxxx')
+    description = "HPI_SMUAD_A1"
+    mask, match = a32_opcode("xxxx_0111_0000_xxxx__1111_xxxx_00x1_xxxx")
+
 
 class HPI_SMULBB_T1(MinorFUTiming):
-    description = 'HPI_SMULBB_T1'
-    mask, match = t32_opcode('1111_1011_0001_xxxx__1111_xxxx_00xx_xxxx')
+    description = "HPI_SMULBB_T1"
+    mask, match = t32_opcode("1111_1011_0001_xxxx__1111_xxxx_00xx_xxxx")
+
+
 class HPI_SMULBB_A1(MinorFUTiming):
-    description = 'HPI_SMULBB_A1'
-    mask, match = a32_opcode('xxxx_0001_0110_xxxx__xxxx_xxxx_1xx0_xxxx')
+    description = "HPI_SMULBB_A1"
+    mask, match = a32_opcode("xxxx_0001_0110_xxxx__xxxx_xxxx_1xx0_xxxx")
+
 
 class HPI_SMULL_T1(MinorFUTiming):
-    description = 'HPI_SMULL_T1'
-    mask, match = t32_opcode('1111_1011_1000_xxxx__xxxx_xxxx_0000_xxxx')
+    description = "HPI_SMULL_T1"
+    mask, match = t32_opcode("1111_1011_1000_xxxx__xxxx_xxxx_0000_xxxx")
+
+
 class HPI_SMULL_A1(MinorFUTiming):
-    description = 'HPI_SMULL_A1'
-    mask, match = a32_opcode('xxxx_0000_110x_xxxx__xxxx_xxxx_1001_xxxx')
+    description = "HPI_SMULL_A1"
+    mask, match = a32_opcode("xxxx_0000_110x_xxxx__xxxx_xxxx_1001_xxxx")
+
 
 class HPI_SMULWB_T1(MinorFUTiming):
-    description = 'HPI_SMULWB_T1'
-    mask, match = t32_opcode('1111_1011_0011_xxxx__1111_xxxx_000x_xxxx')
+    description = "HPI_SMULWB_T1"
+    mask, match = t32_opcode("1111_1011_0011_xxxx__1111_xxxx_000x_xxxx")
+
+
 class HPI_SMULWB_A1(MinorFUTiming):
-    description = 'HPI_SMULWB_A1'
-    mask, match = a32_opcode('xxxx_0001_0010_xxxx__xxxx_xxxx_1x10_xxxx')
+    description = "HPI_SMULWB_A1"
+    mask, match = a32_opcode("xxxx_0001_0010_xxxx__xxxx_xxxx_1x10_xxxx")
+
 
 class HPI_SMUSD_T1(MinorFUTiming):
-    description = 'HPI_SMUSD_T1'
-    mask, match = t32_opcode('1111_1011_0100_xxxx__1111_xxxx_000x_xxxx')
+    description = "HPI_SMUSD_T1"
+    mask, match = t32_opcode("1111_1011_0100_xxxx__1111_xxxx_000x_xxxx")
+
+
 class HPI_SMUSD_A1(MinorFUTiming):
-    description = 'HPI_SMUSD_A1'
-    mask, match = a32_opcode('xxxx_0111_0000_xxxx__1111_xxxx_01x1_xxxx')
+    description = "HPI_SMUSD_A1"
+    mask, match = a32_opcode("xxxx_0111_0000_xxxx__1111_xxxx_01x1_xxxx")
+
 
 class HPI_SSAT_USAT_no_shift_A1(MinorFUTiming):
-    description = 'HPI_SSAT_USAT_no_shift_A1'
+    description = "HPI_SSAT_USAT_no_shift_A1"
     # Order *before* shift
-    mask, match = a32_opcode('xxxx_0110_1x1x_xxxx__xxxx_0000_0001_xxxx')
+    mask, match = a32_opcode("xxxx_0110_1x1x_xxxx__xxxx_0000_0001_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 0]
 
+
 class HPI_SSAT_USAT_shift_A1(MinorFUTiming):
-    description = 'HPI_SSAT_USAT_shift_A1'
+    description = "HPI_SSAT_USAT_shift_A1"
     # Order after shift
-    mask, match = a32_opcode('xxxx_0110_1x1x_xxxx__xxxx_xxxx_xx01_xxxx')
+    mask, match = a32_opcode("xxxx_0110_1x1x_xxxx__xxxx_xxxx_xx01_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 0]
 
+
 class HPI_SSAT16_USAT16_A1(MinorFUTiming):
-    description = 'HPI_SSAT16_USAT16_A1'
-    mask, match = a32_opcode('xxxx_0110_1x10_xxxx__xxxx_xxxx_0011_xxxx')
+    description = "HPI_SSAT16_USAT16_A1"
+    mask, match = a32_opcode("xxxx_0110_1x10_xxxx__xxxx_xxxx_0011_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 0]
 
+
 class HPI_SXTAB_T1(MinorFUTiming):
-    description = 'HPI_SXTAB_T1'
-    mask, match = t32_opcode('1111_1010_0100_xxxx__1111_xxxx_1xxx_xxxx')
+    description = "HPI_SXTAB_T1"
+    mask, match = t32_opcode("1111_1010_0100_xxxx__1111_xxxx_1xxx_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
 
+
 class HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1(MinorFUTiming):
-    description = 'HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1'
+    description = "HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1"
     # Place AFTER HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1
     # e6[9d][^f]0070 are undefined
-    mask, match = a32_opcode('xxxx_0110_1xxx_xxxx__xxxx_xxxx_0111_xxxx')
+    mask, match = a32_opcode("xxxx_0110_1xxx_xxxx__xxxx_xxxx_0111_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
 
+
 class HPI_SXTAB16_T1(MinorFUTiming):
-    description = 'HPI_SXTAB16_T1'
-    mask, match = t32_opcode('1111_1010_0010_xxxx__1111_xxxx_1xxx_xxxx')
+    description = "HPI_SXTAB16_T1"
+    mask, match = t32_opcode("1111_1010_0010_xxxx__1111_xxxx_1xxx_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
 
+
 class HPI_SXTAH_T1(MinorFUTiming):
-    description = 'HPI_SXTAH_T1'
-    mask, match = t32_opcode('1111_1010_0000_xxxx__1111_xxxx_1xxx_xxxx')
+    description = "HPI_SXTAH_T1"
+    mask, match = t32_opcode("1111_1010_0000_xxxx__1111_xxxx_1xxx_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
 
+
 class HPI_SXTB_T1(MinorFUTiming):
-    description = 'HPI_SXTB_T1'
-    mask, match = t16_opcode('1011_0010_01xx_xxxx')
+    description = "HPI_SXTB_T1"
+    mask, match = t16_opcode("1011_0010_01xx_xxxx")
+
+
 class HPI_SXTB_T2(MinorFUTiming):
-    description = 'HPI_SXTB_T2'
-    mask, match = t32_opcode('1111_1010_0100_1111__1111_xxxx_1xxx_xxxx')
+    description = "HPI_SXTB_T2"
+    mask, match = t32_opcode("1111_1010_0100_1111__1111_xxxx_1xxx_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
 
+
 class HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1(MinorFUTiming):
-    description = 'HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1'
+    description = "HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1"
     # e6[9d]f0070 are undefined
-    mask, match = a32_opcode('xxxx_0110_1xxx_1111__xxxx_xxxx_0111_xxxx')
+    mask, match = a32_opcode("xxxx_0110_1xxx_1111__xxxx_xxxx_0111_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 2, 0]
 
+
 class HPI_SXTB16_T1(MinorFUTiming):
-    description = 'HPI_SXTB16_T1'
-    mask, match = t32_opcode('1111_1010_0010_1111__1111_xxxx_1xxx_xxxx')
+    description = "HPI_SXTB16_T1"
+    mask, match = t32_opcode("1111_1010_0010_1111__1111_xxxx_1xxx_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
 
+
 class HPI_SXTH_T1(MinorFUTiming):
-    description = 'HPI_SXTH_T1'
-    mask, match = t16_opcode('1011_0010_00xx_xxxx')
+    description = "HPI_SXTH_T1"
+    mask, match = t16_opcode("1011_0010_00xx_xxxx")
+
+
 class HPI_SXTH_T2(MinorFUTiming):
-    description = 'HPI_SXTH_T2'
-    mask, match = t32_opcode('1111_1010_0000_1111__1111_xxxx_1xxx_xxxx')
+    description = "HPI_SXTH_T2"
+    mask, match = t32_opcode("1111_1010_0000_1111__1111_xxxx_1xxx_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 1, 2, 0]
 
-class HPI_UDIV_T1(MinorFUTiming):
-    description = 'HPI_UDIV_T1'
-    mask, match = t32_opcode('1111_1011_1011_xxxx__xxxx_xxxx_1111_xxxx')
 
-udiv_lat_expr = expr_top(let([
-    ('left', int_reg(src(4))),
-    ('right', int_reg(src(3))),
-    ('left_size', un('SizeInBits', ref('left'))),
-    ('right_size', un('SizeInBits',
-        bin('UDiv', ref('right'), literal(2)))),
-    ('left_minus_right', if_expr(
-        bin('SLessThan', ref('left_size'), ref('right_size')),
-        literal(0),
-        bin('Sub', ref('left_size'), ref('right_size'))))
-    ],
-    if_expr(bin('Equal', ref('right'), literal(0)),
-        literal(0),
-        bin('UDiv', ref('left_minus_right'), literal(4)))
-    ))
+class HPI_UDIV_T1(MinorFUTiming):
+    description = "HPI_UDIV_T1"
+    mask, match = t32_opcode("1111_1011_1011_xxxx__xxxx_xxxx_1111_xxxx")
+
+
+udiv_lat_expr = expr_top(
+    let(
+        [
+            ("left", src_reg(4)),
+            ("right", src_reg(3)),
+            ("left_size", un("SizeInBits", ref("left"))),
+            (
+                "right_size",
+                un("SizeInBits", bin("UDiv", ref("right"), literal(2))),
+            ),
+            (
+                "left_minus_right",
+                if_expr(
+                    bin("SLessThan", ref("left_size"), ref("right_size")),
+                    literal(0),
+                    bin("Sub", ref("left_size"), ref("right_size")),
+                ),
+            ),
+        ],
+        if_expr(
+            bin("Equal", ref("right"), literal(0)),
+            literal(0),
+            bin("UDiv", ref("left_minus_right"), literal(4)),
+        ),
+    )
+)
+
 
 class HPI_UDIV_A1(MinorFUTiming):
-    description = 'HPI_UDIV_A1'
-    mask, match = a32_opcode('xxxx_0111_0011_xxxx__xxxx_xxxx_0001_xxxx')
+    description = "HPI_UDIV_A1"
+    mask, match = a32_opcode("xxxx_0111_0011_xxxx__xxxx_xxxx_0001_xxxx")
     extraCommitLat = 0
     srcRegsRelativeLats = []
     extraCommitLatExpr = udiv_lat_expr
 
+
 class HPI_UMAAL_T1(MinorFUTiming):
-    description = 'HPI_UMAAL_T1'
-    mask, match = t32_opcode('1111_1011_1110_xxxx__xxxx_xxxx_0110_xxxx')
+    description = "HPI_UMAAL_T1"
+    mask, match = t32_opcode("1111_1011_1110_xxxx__xxxx_xxxx_0110_xxxx")
     # z, z, z, dlo, dhi, l, r
     extraCommitLat = 1
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 0, 0]
 
+
 class HPI_UMAAL_A1(MinorFUTiming):
-    description = 'HPI_UMAAL_A1'
-    mask, match = a32_opcode('xxxx_0000_0100_xxxx__xxxx_xxxx_1001_xxxx')
+    description = "HPI_UMAAL_A1"
+    mask, match = a32_opcode("xxxx_0000_0100_xxxx__xxxx_xxxx_1001_xxxx")
     # z, z, z, dlo, dhi, l, r
     extraCommitLat = 1
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 0, 0]
 
+
 class HPI_UMLAL_T1(MinorFUTiming):
-    description = 'HPI_UMLAL_T1'
-    mask, match = t32_opcode('1111_1011_1110_xxxx__xxxx_xxxx_0000_xxxx')
+    description = "HPI_UMLAL_T1"
+    mask, match = t32_opcode("1111_1011_1110_xxxx__xxxx_xxxx_0000_xxxx")
+
 
 class HPI_UMLAL_A1(MinorFUTiming):
-    description = 'HPI_UMLAL_A1'
-    mask, match = t32_opcode('xxxx_0000_101x_xxxx__xxxx_xxxx_1001_xxxx')
+    description = "HPI_UMLAL_A1"
+    mask, match = t32_opcode("xxxx_0000_101x_xxxx__xxxx_xxxx_1001_xxxx")
+
 
 class HPI_UMULL_T1(MinorFUTiming):
-    description = 'HPI_UMULL_T1'
-    mask, match = t32_opcode('1111_1011_1010_xxxx__xxxx_xxxx_0000_xxxx')
+    description = "HPI_UMULL_T1"
+    mask, match = t32_opcode("1111_1011_1010_xxxx__xxxx_xxxx_0000_xxxx")
+
 
 class HPI_UMULL_A1(MinorFUTiming):
-    description = 'HPI_UMULL_A1'
-    mask, match = a32_opcode('xxxx_0000_100x_xxxx__xxxx_xxxx_1001_xxxx')
+    description = "HPI_UMULL_A1"
+    mask, match = a32_opcode("xxxx_0000_100x_xxxx__xxxx_xxxx_1001_xxxx")
+
 
 class HPI_USAD8_USADA8_A1(MinorFUTiming):
-    description = 'HPI_USAD8_USADA8_A1'
-    mask, match = a32_opcode('xxxx_0111_1000_xxxx__xxxx_xxxx_0001_xxxx')
+    description = "HPI_USAD8_USADA8_A1"
+    mask, match = a32_opcode("xxxx_0111_1000_xxxx__xxxx_xxxx_0001_xxxx")
     srcRegsRelativeLats = [0, 0, 0, 0, 0, 2, 0]
 
+
 class HPI_USAD8_USADA8_A1_Suppress(MinorFUTiming):
-    description = 'HPI_USAD8_USADA8_A1_Suppress'
-    mask, match = a32_opcode('xxxx_0111_1000_xxxx__xxxx_xxxx_0001_xxxx')
+    description = "HPI_USAD8_USADA8_A1_Suppress"
+    mask, match = a32_opcode("xxxx_0111_1000_xxxx__xxxx_xxxx_0001_xxxx")
     srcRegsRelativeLats = []
     suppress = True
 
+
 class HPI_VMOV_immediate_A1(MinorFUTiming):
-    description = 'HPI_VMOV_register_A1'
-    mask, match = a32_opcode('1111_0010_0x10_xxxx_xxxx_0001_xxx1_xxxx')
+    description = "HPI_VMOV_register_A1"
+    mask, match = a32_opcode("1111_0010_0x10_xxxx_xxxx_0001_xxx1_xxxx")
     # cpsr, z, z, z, hcptr, nsacr, cpacr, fpexc, scr
     srcRegsRelativeLats = [5, 5, 5, 5, 5, 5, 5, 5, 5, 0]
 
+
 class HPI_VMRS_A1(MinorFUTiming):
-    description = 'HPI_VMRS_A1'
-    mask, match = a32_opcode('xxxx_1110_1111_0001_xxxx_1010_xxx1_xxxx')
+    description = "HPI_VMRS_A1"
+    mask, match = a32_opcode("xxxx_1110_1111_0001_xxxx_1010_xxx1_xxxx")
     # cpsr,z,z,z,hcptr,nsacr,cpacr,scr,r42
     srcRegsRelativeLats = [5, 5, 5, 5, 5, 5, 5, 5, 5, 0]
 
+
 class HPI_VMOV_register_A2(MinorFUTiming):
-    description = 'HPI_VMOV_register_A2'
-    mask, match = a32_opcode('xxxx_1110_1x11_0000_xxxx_101x_01x0_xxxx')
+    description = "HPI_VMOV_register_A2"
+    mask, match = a32_opcode("xxxx_1110_1x11_0000_xxxx_101x_01x0_xxxx")
     # cpsr, z, r39, z, hcptr, nsacr, cpacr, fpexc, scr, f4, f5, f0, f1
-    srcRegsRelativeLats = \
-        [5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    srcRegsRelativeLats = [
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        4,
+        4,
+        4,
+        4,
+        4,
+        4,
+        4,
+        4,
+        0,
+    ]
+
 
 # VADD.I16 D/VADD.F32 D/VADD.I8 D/VADD.I32 D
 class HPI_VADD2H_A32(MinorFUTiming):
-    description = 'Vadd2hALU'
-    mask, match = a32_opcode('1111_0010_0xxx_xxxx__xxxx_1000_xxx0_xxxx')
+    description = "Vadd2hALU"
+    mask, match = a32_opcode("1111_0010_0xxx_xxxx__xxxx_1000_xxx0_xxxx")
     # cpsr, z, z, z, cpacr, fpexc, l0, r0, l1, r1, l2, r2, l3, r3 (for vadd2h)
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 0]
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 # VAQQHN.I16 Q/VAQQHN.I32 Q/VAQQHN.I64 Q
 class HPI_VADDHN_A32(MinorFUTiming):
-    description = 'VaddhnALU'
-    mask, match = a32_opcode('1111_0010_1xxx_xxxx__xxxx_0100_x0x0_xxxx')
+    description = "VaddhnALU"
+    mask, match = a32_opcode("1111_0010_1xxx_xxxx__xxxx_0100_x0x0_xxxx")
     # cpsr, z, z, z, cpacr, fpexc, l0, l1, l2, l3, r0, r1, r2, r3
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  3, 3, 3, 3, 3, 3, 3, 3, 0]
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
 
 class HPI_VADDL_A32(MinorFUTiming):
-    description = 'VaddlALU'
-    mask, match = a32_opcode('1111_001x_1xxx_xxxx__xxxx_0000_x0x0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  3, 3, 3, 3, 3, 3, 3, 3, 0]
+    description = "VaddlALU"
+    mask, match = a32_opcode("1111_001x_1xxx_xxxx__xxxx_0000_x0x0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
 
 class HPI_VADDW_A32(MinorFUTiming):
-    description = 'HPI_VADDW_A32'
-    mask, match = a32_opcode('1111_001x_1xxx_xxxx__xxxx_0001_x0x0_xxxx')
+    description = "HPI_VADDW_A32"
+    mask, match = a32_opcode("1111_001x_1xxx_xxxx__xxxx_0001_x0x0_xxxx")
     # cpsr, z, z, z, cpacr, fpexc, l0, l1, l2, l3, r0, r1
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 3, 3, 0]
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 3, 3, 0]
+
 
 # VHADD/VHSUB S8,S16,S32,U8,U16,U32 Q and D
 class HPI_VHADD_A32(MinorFUTiming):
-    description = 'HPI_VHADD_A32'
-    mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_00x0_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VHADD_A32"
+    mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_00x0_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 class HPI_VPADAL_A32(MinorFUTiming):
-    description = 'VpadalALU'
-    mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0110_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  2, 2, 2, 2, 2, 2, 2, 2, 0]
+    description = "VpadalALU"
+    mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0110_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
 
 # VPADDH.I16
 class HPI_VPADDH_A32(MinorFUTiming):
-    description = 'VpaddhALU'
-    mask, match = a32_opcode('1111_0010_0xxx_xxxx__xxxx_1011_xxx1_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  3, 3, 3, 3, 3, 3, 3, 3, 0]
+    description = "VpaddhALU"
+    mask, match = a32_opcode("1111_0010_0xxx_xxxx__xxxx_1011_xxx1_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
 
 # VPADDH.F32
 class HPI_VPADDS_A32(MinorFUTiming):
-    description = 'VpaddsALU'
-    mask, match = a32_opcode('1111_0011_0x0x_xxxx__xxxx_1101_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  2, 2, 2, 2, 2, 2, 2, 2, 0]
+    description = "VpaddsALU"
+    mask, match = a32_opcode("1111_0011_0x0x_xxxx__xxxx_1101_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
 
 # VPADDL.S16
 class HPI_VPADDL_A32(MinorFUTiming):
-    description = 'VpaddlALU'
-    mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0010_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  3, 3, 3, 3, 3, 3, 3, 3, 0]
+    description = "VpaddlALU"
+    mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0010_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
 
 # VRADDHN.I16
 class HPI_VRADDHN_A32(MinorFUTiming):
-    description = 'HPI_VRADDHN_A32'
-    mask, match = a32_opcode('1111_0011_1xxx_xxxx__xxxx_0100_x0x0_xxxx')
+    description = "HPI_VRADDHN_A32"
+    mask, match = a32_opcode("1111_0011_1xxx_xxxx__xxxx_0100_x0x0_xxxx")
     # cpsr, z, z, z, cpacr, fpexc, l0, l1, l2, l3, r0, r1, r2, r3
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 0]
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 class HPI_VRHADD_A32(MinorFUTiming):
-    description = 'VrhaddALU'
-    mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_0001_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "VrhaddALU"
+    mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_0001_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 class HPI_VQADD_A32(MinorFUTiming):
-    description = 'VqaddALU'
-    mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_0000_xxx1_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  3, 3, 3, 3, 3, 3, 3, 3, 0]
+    description = "VqaddALU"
+    mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_0000_xxx1_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
 
 class HPI_VANDQ_A32(MinorFUTiming):
-    description = 'VandqALU'
-    mask, match = a32_opcode('1111_0010_0x00_xxxx__xxxx_0001_xxx1_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  5, 5, 5, 5, 5, 5, 5, 5, 0]
+    description = "VandqALU"
+    mask, match = a32_opcode("1111_0010_0x00_xxxx__xxxx_0001_xxx1_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+
 
 # VMUL (integer)
 class HPI_VMULI_A32(MinorFUTiming):
-    description = 'VmuliALU'
-    mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_1001_xxx1_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  2, 2, 2, 2, 2, 2, 2, 2, 0]
+    description = "VmuliALU"
+    mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_1001_xxx1_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
 
 # VBIC (reg)
 class HPI_VBIC_A32(MinorFUTiming):
-    description = 'VbicALU'
-    mask, match = a32_opcode('1111_0010_0x01_xxxx__xxxx_0001_xxx1_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  5, 5, 5, 5, 5, 5, 5, 5, 0]
+    description = "VbicALU"
+    mask, match = a32_opcode("1111_0010_0x01_xxxx__xxxx_0001_xxx1_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+
 
 # VBIF VBIT VBSL
 class HPI_VBIF_ETC_A32(MinorFUTiming):
-    description = 'VbifALU'
-    mask, match = a32_opcode('1111_0011_0xxx_xxxx__xxxx_0001_xxx1_xxxx')
-    srcRegsRelativeLats = \
-        [0, 0, 0, 0, 0, 0,  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0]
+    description = "VbifALU"
+    mask, match = a32_opcode("1111_0011_0xxx_xxxx__xxxx_0001_xxx1_xxxx")
+    srcRegsRelativeLats = [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        0,
+    ]
+
 
 class HPI_VACGE_A32(MinorFUTiming):
-    description = 'VacgeALU'
-    mask, match = a32_opcode('1111_0011_0xxx_xxxx__xxxx_1110_xxx1_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "VacgeALU"
+    mask, match = a32_opcode("1111_0011_0xxx_xxxx__xxxx_1110_xxx1_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 # VCEQ.F32
 class HPI_VCEQ_A32(MinorFUTiming):
-    description = 'VceqALU'
-    mask, match = a32_opcode('1111_0010_0x0x_xxxx__xxxx_1110_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "VceqALU"
+    mask, match = a32_opcode("1111_0010_0x0x_xxxx__xxxx_1110_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 # VCEQ.[IS]... register
 class HPI_VCEQI_A32(MinorFUTiming):
-    description = 'VceqiALU'
-    mask, match = a32_opcode('1111_0011_0xxx_xxxx__xxxx_1000_xxx1_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "VceqiALU"
+    mask, match = a32_opcode("1111_0011_0xxx_xxxx__xxxx_1000_xxx1_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 # VCEQ.[IS]... immediate
 class HPI_VCEQII_A32(MinorFUTiming):
-    description = 'HPI_VCEQII_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx01__xxxx_0x01_0xx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VCEQII_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx01__xxxx_0x01_0xx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 class HPI_VTST_A32(MinorFUTiming):
-    description = 'HPI_VTST_A32'
-    mask, match = a32_opcode('1111_0010_0xxx_xxxx__xxxx_1000_xxx1_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  3, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+    description = "HPI_VTST_A32"
+    mask, match = a32_opcode("1111_0010_0xxx_xxxx__xxxx_1000_xxx1_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
 
 class HPI_VCLZ_A32(MinorFUTiming):
-    description = 'HPI_VCLZ_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0100_1xx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VCLZ_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0100_1xx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 class HPI_VCNT_A32(MinorFUTiming):
-    description = 'HPI_VCNT_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0101_0xx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VCNT_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0101_0xx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 class HPI_VEXT_A32(MinorFUTiming):
-    description = 'HPI_VCNT_A32'
-    mask, match = a32_opcode('1111_0010_1x11_xxxx__xxxx_xxxx_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VCNT_A32"
+    mask, match = a32_opcode("1111_0010_1x11_xxxx__xxxx_xxxx_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 # VMAX VMIN integer
 class HPI_VMAXI_A32(MinorFUTiming):
-    description = 'HPI_VMAXI_A32'
-    mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_0110_xxxx_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VMAXI_A32"
+    mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_0110_xxxx_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 # VMAX VMIN float
 class HPI_VMAXS_A32(MinorFUTiming):
-    description = 'HPI_VMAXS_A32'
-    mask, match = a32_opcode('1111_0010_0xxx_xxxx__xxxx_1111_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  2, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+    description = "HPI_VMAXS_A32"
+    mask, match = a32_opcode("1111_0010_0xxx_xxxx__xxxx_1111_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
 
 # VNEG integer
 class HPI_VNEGI_A32(MinorFUTiming):
-    description = 'HPI_VNEGI_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx01__xxxx_0x11_1xx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VNEGI_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx01__xxxx_0x11_1xx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 # VNEG float
 class HPI_VNEGF_A32(MinorFUTiming):
-    description = 'HPI_VNEGF_A32'
-    mask, match = a32_opcode('xxxx_1110_1x11_0001__xxxx_101x_01x0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  2, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+    description = "HPI_VNEGF_A32"
+    mask, match = a32_opcode("xxxx_1110_1x11_0001__xxxx_101x_01x0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
 
 # VREV16 VREV32 VREV64
 class HPI_VREVN_A32(MinorFUTiming):
-    description = 'HPI_VREVN_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_000x_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VREVN_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_000x_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 class HPI_VQNEG_A32(MinorFUTiming):
-    description = 'HPI_VQNEG_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx00__xxxx_0111_1xx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  3, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+    description = "HPI_VQNEG_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx00__xxxx_0111_1xx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
 
 class HPI_VSWP_A32(MinorFUTiming):
-    description = 'HPI_VSWP_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0000_0xx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VSWP_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0000_0xx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 class HPI_VTRN_A32(MinorFUTiming):
-    description = 'HPI_VTRN_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0000_1xx0_xxxx')
+    description = "HPI_VTRN_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0000_1xx0_xxxx")
     # cpsr, z, z, z, cpact, fpexc, o0, d0, o1, d1, o2, d2, o3, d3
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  2, 2, 2, 2, 2, 2, 2, 2, 0]
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0]
+
 
 # VQMOVN VQMOVUN
 class HPI_VQMOVN_A32(MinorFUTiming):
-    description = 'HPI_VQMOVN_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0010_xxx0_xxxx')
+    description = "HPI_VQMOVN_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0010_xxx0_xxxx")
     # cpsr, z, z, z, cpact, fpexc, o[0], o[1], o[2], o[3], fpscr
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  2, 2, 2, 2,  2, 0]
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0]
+
 
 # VUZP double word
 class HPI_VUZP_A32(MinorFUTiming):
-    description = 'HPI_VUZP_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0001_00x0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  3, 3, 3, 3, 3, 3, 3, 3, 0]
+    description = "HPI_VUZP_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0001_00x0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0]
+
 
 # VDIV.F32
 class HPI_VDIV32_A32(MinorFUTiming):
-    description = 'HPI_VDIV32_A32'
-    mask, match = a32_opcode('xxxx_1110_1x00_xxxx__xxxx_1010_x0x0_xxxx')
+    description = "HPI_VDIV32_A32"
+    mask, match = a32_opcode("xxxx_1110_1x00_xxxx__xxxx_1010_x0x0_xxxx")
     # cpsr, z, z, z, cpact, fpexc, fpscr_exc, l, r
     extraCommitLat = 9
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 20,  4, 4, 0]
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 20, 4, 4, 0]
+
 
 # VDIV.F64
 class HPI_VDIV64_A32(MinorFUTiming):
-    description = 'HPI_VDIV64_A32'
-    mask, match = a32_opcode('xxxx_1110_1x00_xxxx__xxxx_1011_x0x0_xxxx')
+    description = "HPI_VDIV64_A32"
+    mask, match = a32_opcode("xxxx_1110_1x00_xxxx__xxxx_1011_x0x0_xxxx")
     # cpsr, z, z, z, cpact, fpexc, fpscr_exc, l, r
     extraCommitLat = 18
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 20,  4, 4, 0]
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 20, 4, 4, 0]
+
 
 class HPI_VZIP_A32(MinorFUTiming):
-    description = 'HPI_VZIP_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0001_1xx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 4, 4, 4, 4, 0]
+    description = "HPI_VZIP_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0001_1xx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 0]
+
 
 # VPMAX integer
 class HPI_VPMAX_A32(MinorFUTiming):
-    description = 'HPI_VPMAX_A32'
-    mask, match = a32_opcode('1111_001x_0xxx_xxxx__xxxx_1010_xxxx_xxxx')
+    description = "HPI_VPMAX_A32"
+    mask, match = a32_opcode("1111_001x_0xxx_xxxx__xxxx_1010_xxxx_xxxx")
     # cpsr, z, z, z, cpact, fpexc, l0, r0, l1, r1, fpscr
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4,  4, 0]
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 0]
+
 
 # VPMAX float
 class HPI_VPMAXF_A32(MinorFUTiming):
-    description = 'HPI_VPMAXF_A32'
-    mask, match = a32_opcode('1111_0011_0xxx_xxxx__xxxx_1111_xxx0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  2, 2, 2, 2, 0]
+    description = "HPI_VPMAXF_A32"
+    mask, match = a32_opcode("1111_0011_0xxx_xxxx__xxxx_1111_xxx0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0]
+
 
 class HPI_VMOVN_A32(MinorFUTiming):
-    description = 'HPI_VMOVN_A32'
-    mask, match = a32_opcode('1111_0011_1x11_xx10__xxxx_0010_00x0_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 0]
+    description = "HPI_VMOVN_A32"
+    mask, match = a32_opcode("1111_0011_1x11_xx10__xxxx_0010_00x0_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0]
+
 
 class HPI_VMOVL_A32(MinorFUTiming):
-    description = 'HPI_VMOVL_A32'
-    mask, match = a32_opcode('1111_001x_1xxx_x000__xxxx_1010_00x1_xxxx')
-    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0,  4, 4, 4, 4, 0]
+    description = "HPI_VMOVL_A32"
+    mask, match = a32_opcode("1111_001x_1xxx_x000__xxxx_1010_00x1_xxxx")
+    srcRegsRelativeLats = [0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0]
+
 
 # VSQRT.F64
 class HPI_VSQRT64_A32(MinorFUTiming):
-    description = 'HPI_VSQRT64_A32'
-    mask, match = a32_opcode('xxxx_1110_1x11_0001__xxxx_1011_11x0_xxxx')
+    description = "HPI_VSQRT64_A32"
+    mask, match = a32_opcode("xxxx_1110_1x11_0001__xxxx_1011_11x0_xxxx")
     extraCommitLat = 18
     srcRegsRelativeLats = []
 
+
 # VSQRT.F32
 class HPI_VSQRT32_A32(MinorFUTiming):
-    description = 'HPI_VSQRT32_A32'
-    mask, match = a32_opcode('xxxx_1110_1x11_0001__xxxx_1010_11x0_xxxx')
+    description = "HPI_VSQRT32_A32"
+    mask, match = a32_opcode("xxxx_1110_1x11_0001__xxxx_1010_11x0_xxxx")
     extraCommitLat = 9
     srcRegsRelativeLats = []
 
+
 class HPI_FloatSimdFU(MinorFU):
-    opClasses = minorMakeOpClassSet([
-        'FloatAdd', 'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatDiv',
-        'FloatSqrt', 'FloatMisc', 'FloatMultAcc',
-        'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt',
-        'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc',
-        'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp',
-        'SimdFloatCvt', 'SimdFloatDiv', 'SimdFloatMisc', 'SimdFloatMult',
-        'SimdFloatMultAcc', 'SimdFloatSqrt'])
+    opClasses = minorMakeOpClassSet(
+        [
+            "FloatAdd",
+            "FloatCmp",
+            "FloatCvt",
+            "FloatMult",
+            "FloatDiv",
+            "FloatSqrt",
+            "FloatMisc",
+            "FloatMultAcc",
+            "SimdAdd",
+            "SimdAddAcc",
+            "SimdAlu",
+            "SimdCmp",
+            "SimdCvt",
+            "SimdMisc",
+            "SimdMult",
+            "SimdMultAcc",
+            "SimdShift",
+            "SimdShiftAcc",
+            "SimdSqrt",
+            "SimdFloatAdd",
+            "SimdFloatAlu",
+            "SimdFloatCmp",
+            "SimdFloatCvt",
+            "SimdFloatDiv",
+            "SimdFloatMisc",
+            "SimdFloatMult",
+            "SimdFloatMultAcc",
+            "SimdFloatSqrt",
+        ]
+    )
 
     timings = [
         # VUZP and VZIP must be before VADDW/L
-        HPI_VUZP_A32(), HPI_VZIP_A32(),
-        HPI_VADD2H_A32(), HPI_VADDHN_A32(),
-        HPI_VADDL_A32(), HPI_VADDW_A32(),
-        HPI_VHADD_A32(), HPI_VPADAL_A32(),
-        HPI_VPADDH_A32(), HPI_VPADDS_A32(),
-        HPI_VPADDL_A32(), HPI_VRADDHN_A32(),
-        HPI_VRHADD_A32(), HPI_VQADD_A32(),
-        HPI_VANDQ_A32(), HPI_VBIC_A32(),
-        HPI_VBIF_ETC_A32(), HPI_VACGE_A32(),
-        HPI_VCEQ_A32(), HPI_VCEQI_A32(),
-        HPI_VCEQII_A32(), HPI_VTST_A32(),
-        HPI_VCLZ_A32(), HPI_VCNT_A32(),
-        HPI_VEXT_A32(), HPI_VMAXI_A32(),
-        HPI_VMAXS_A32(), HPI_VNEGI_A32(),
-        HPI_VNEGF_A32(), HPI_VREVN_A32(),
-        HPI_VQNEG_A32(), HPI_VSWP_A32(),
-        HPI_VTRN_A32(), HPI_VPMAX_A32(),
-        HPI_VPMAXF_A32(), HPI_VMOVN_A32(),
+        HPI_VUZP_A32(),
+        HPI_VZIP_A32(),
+        HPI_VADD2H_A32(),
+        HPI_VADDHN_A32(),
+        HPI_VADDL_A32(),
+        HPI_VADDW_A32(),
+        HPI_VHADD_A32(),
+        HPI_VPADAL_A32(),
+        HPI_VPADDH_A32(),
+        HPI_VPADDS_A32(),
+        HPI_VPADDL_A32(),
+        HPI_VRADDHN_A32(),
+        HPI_VRHADD_A32(),
+        HPI_VQADD_A32(),
+        HPI_VANDQ_A32(),
+        HPI_VBIC_A32(),
+        HPI_VBIF_ETC_A32(),
+        HPI_VACGE_A32(),
+        HPI_VCEQ_A32(),
+        HPI_VCEQI_A32(),
+        HPI_VCEQII_A32(),
+        HPI_VTST_A32(),
+        HPI_VCLZ_A32(),
+        HPI_VCNT_A32(),
+        HPI_VEXT_A32(),
+        HPI_VMAXI_A32(),
+        HPI_VMAXS_A32(),
+        HPI_VNEGI_A32(),
+        HPI_VNEGF_A32(),
+        HPI_VREVN_A32(),
+        HPI_VQNEG_A32(),
+        HPI_VSWP_A32(),
+        HPI_VTRN_A32(),
+        HPI_VPMAX_A32(),
+        HPI_VPMAXF_A32(),
+        HPI_VMOVN_A32(),
         HPI_VMRS_A1(),
         HPI_VMOV_immediate_A1(),
         HPI_VMOV_register_A2(),
-        HPI_VQMOVN_A32(), HPI_VMOVL_A32(),
-        HPI_VDIV32_A32(), HPI_VDIV64_A32(),
-        HPI_VSQRT32_A32(), HPI_VSQRT64_A32(),
+        HPI_VQMOVN_A32(),
+        HPI_VMOVL_A32(),
+        HPI_VDIV32_A32(),
+        HPI_VDIV64_A32(),
+        HPI_VSQRT32_A32(),
+        HPI_VSQRT64_A32(),
         HPI_VMULI_A32(),
         # Add before here
         HPI_FMADD_A64(),
@@ -1153,14 +1489,17 @@
         HPI_FMOV_A64(),
         HPI_ADD_SUB_vector_scalar_A64(),
         HPI_ADD_SUB_vector_vector_A64(),
-        HPI_FDIV_scalar_32_A64(), HPI_FDIV_scalar_64_A64(),
+        HPI_FDIV_scalar_32_A64(),
+        HPI_FDIV_scalar_64_A64(),
         HPI_DefaultA64Vfp(),
-        HPI_DefaultVfp()]
+        HPI_DefaultVfp(),
+    ]
 
     opLat = 6
 
+
 class HPI_IntFU(MinorFU):
-    opClasses = minorMakeOpClassSet(['IntAlu'])
+    opClasses = minorMakeOpClassSet(["IntAlu"])
     # IMPORTANT! Keep the order below, add new entries *at the head*
     timings = [
         HPI_SSAT_USAT_no_shift_A1(),
@@ -1179,17 +1518,14 @@
         HPI_SASX_SHASX_UASX_UHASX_A1(),
         HPI_SHSAX_SSAX_UHSAX_USAX_A1(),
         HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1(),
-
         # Must be after HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1
         HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1(),
-
         HPI_SXTAB_T1(),
         HPI_SXTAB16_T1(),
         HPI_SXTAH_T1(),
         HPI_SXTB_T2(),
         HPI_SXTB16_T1(),
         HPI_SXTH_T2(),
-
         HPI_PKH_A1(),
         HPI_PKH_T1(),
         HPI_SBFX_UBFX_A1(),
@@ -1200,24 +1536,22 @@
         HPI_USAD8_USADA8_A1(),
         HPI_BFI_A1(),
         HPI_BFI_T1(),
-
         HPI_CMN_register_A1(),
         HPI_CMN_immediate_A1(),
         HPI_CMP_register_A1(),
         HPI_CMP_immediate_A1(),
-
         HPI_DataProcessingNoShift(),
         HPI_DataProcessingMovShiftr(),
         HPI_DataProcessingMayShift(),
-
         HPI_Cxxx_A64(),
-
         HPI_DefaultA64Int(),
-        HPI_DefaultInt()]
+        HPI_DefaultInt(),
+    ]
     opLat = 3
 
+
 class HPI_Int2FU(MinorFU):
-    opClasses = minorMakeOpClassSet(['IntAlu'])
+    opClasses = minorMakeOpClassSet(["IntAlu"])
     # IMPORTANT! Keep the order below, add new entries *at the head*
     timings = [
         HPI_SSAT_USAT_no_shift_A1(),
@@ -1236,17 +1570,14 @@
         HPI_SASX_SHASX_UASX_UHASX_A1(),
         HPI_SHSAX_SSAX_UHSAX_USAX_A1(),
         HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1(),
-
         # Must be after HPI_SXTB_SXTB16_SXTH_UXTB_UXTB16_UXTH_A1
         HPI_SXTAB_SXTAB16_SXTAH_UXTAB_UXTAB16_UXTAH_A1(),
-
         HPI_SXTAB_T1(),
         HPI_SXTAB16_T1(),
         HPI_SXTAH_T1(),
         HPI_SXTB_T2(),
         HPI_SXTB16_T1(),
         HPI_SXTH_T2(),
-
         HPI_PKH_A1(),
         HPI_PKH_T1(),
         HPI_SBFX_UBFX_A1(),
@@ -1257,16 +1588,13 @@
         HPI_USAD8_USADA8_A1_Suppress(),
         HPI_BFI_A1(),
         HPI_BFI_T1(),
-
-        HPI_CMN_register_A1(), # Need to check for shift
+        HPI_CMN_register_A1(),  # Need to check for shift
         HPI_CMN_immediate_A1(),
-        HPI_CMP_register_A1(), # Need to check for shift
+        HPI_CMP_register_A1(),  # Need to check for shift
         HPI_CMP_immediate_A1(),
-
         HPI_DataProcessingNoShift(),
         HPI_DataProcessingAllowShifti(),
         # HPI_DataProcessingAllowMovShiftr(),
-
         # Data processing ops that match SuppressShift but are *not*
         # to be suppressed here
         HPI_CLZ_A1(),
@@ -1275,63 +1603,80 @@
         # Can you dual issue a branch?
         # HPI_DataProcessingSuppressBranch(),
         HPI_Cxxx_A64(),
-
         HPI_DefaultA64Int(),
-        HPI_DefaultInt()]
+        HPI_DefaultInt(),
+    ]
     opLat = 3
 
+
 class HPI_IntMulFU(MinorFU):
-    opClasses = minorMakeOpClassSet(['IntMult'])
+    opClasses = minorMakeOpClassSet(["IntMult"])
     timings = [
-        HPI_MLA_A1(), HPI_MLA_T1(),
-        HPI_MLS_A1(), HPI_MLS_T1(),
-        HPI_SMLABB_A1(), HPI_SMLABB_T1(),
-        HPI_SMLAWB_A1(), HPI_SMLAWB_T1(),
-        HPI_SMLAD_A1(), HPI_SMLAD_T1(),
-        HPI_SMMUL_A1(), HPI_SMMUL_T1(),
+        HPI_MLA_A1(),
+        HPI_MLA_T1(),
+        HPI_MLS_A1(),
+        HPI_MLS_T1(),
+        HPI_SMLABB_A1(),
+        HPI_SMLABB_T1(),
+        HPI_SMLAWB_A1(),
+        HPI_SMLAWB_T1(),
+        HPI_SMLAD_A1(),
+        HPI_SMLAD_T1(),
+        HPI_SMMUL_A1(),
+        HPI_SMMUL_T1(),
         # SMMUL_A1 must be before SMMLA_A1
-        HPI_SMMLA_A1(), HPI_SMMLA_T1(),
-        HPI_SMMLS_A1(), HPI_SMMLS_T1(),
-        HPI_UMAAL_A1(), HPI_UMAAL_T1(),
-
+        HPI_SMMLA_A1(),
+        HPI_SMMLA_T1(),
+        HPI_SMMLS_A1(),
+        HPI_SMMLS_T1(),
+        HPI_UMAAL_A1(),
+        HPI_UMAAL_T1(),
         HPI_MADD_A64(),
         HPI_DefaultA64Mul(),
-        HPI_DefaultMul()]
+        HPI_DefaultMul(),
+    ]
     opLat = 3
-    cantForwardFromFUIndices = [0, 1, 5] # Int1, Int2, Mem
+    cantForwardFromFUIndices = [0, 1, 5]  # Int1, Int2, Mem
+
 
 class HPI_IntDivFU(MinorFU):
-    opClasses = minorMakeOpClassSet(['IntDiv'])
-    timings = [HPI_SDIV_A1(), HPI_UDIV_A1(),
-        HPI_SDIV_A64()]
+    opClasses = minorMakeOpClassSet(["IntDiv"])
+    timings = [HPI_SDIV_A1(), HPI_UDIV_A1(), HPI_SDIV_A64()]
     issueLat = 3
     opLat = 3
 
+
 class HPI_MemFU(MinorFU):
-    opClasses = minorMakeOpClassSet(['MemRead', 'MemWrite', 'FloatMemRead',
-                                     'FloatMemWrite'])
+    opClasses = minorMakeOpClassSet(
+        ["MemRead", "MemWrite", "FloatMemRead", "FloatMemWrite"]
+    )
     timings = [HPI_DefaultMem(), HPI_DefaultMem64()]
     opLat = 1
-    cantForwardFromFUIndices = [5] # Mem (this FU)
+    cantForwardFromFUIndices = [5]  # Mem (this FU)
+
 
 class HPI_MiscFU(MinorFU):
-    opClasses = minorMakeOpClassSet(['IprAccess', 'InstPrefetch'])
+    opClasses = minorMakeOpClassSet(["IprAccess", "InstPrefetch"])
     opLat = 1
 
+
 class HPI_FUPool(MinorFUPool):
-    funcUnits = [HPI_IntFU(), # 0
-        HPI_Int2FU(), # 1
-        HPI_IntMulFU(), # 2
-        HPI_IntDivFU(), # 3
-        HPI_FloatSimdFU(), # 4
-        HPI_MemFU(), # 5
-        HPI_MiscFU() # 6
-        ]
+    funcUnits = [
+        HPI_IntFU(),  # 0
+        HPI_Int2FU(),  # 1
+        HPI_IntMulFU(),  # 2
+        HPI_IntDivFU(),  # 3
+        HPI_FloatSimdFU(),  # 4
+        HPI_MemFU(),  # 5
+        HPI_MiscFU(),  # 6
+    ]
+
 
 class HPI_MMU(ArmMMU):
     itb = ArmTLB(entry_type="instruction", size=256)
     dtb = ArmTLB(entry_type="data", size=256)
 
+
 class HPI_BP(TournamentBP):
     localPredictorSize = 64
     localCtrBits = 2
@@ -1345,28 +1690,29 @@
     RASSize = 8
     instShiftAmt = 2
 
+
 class HPI_ICache(Cache):
     data_latency = 1
     tag_latency = 1
     response_latency = 1
     mshrs = 2
     tgts_per_mshr = 8
-    size = '32kB'
+    size = "32kB"
     assoc = 2
     # No prefetcher, this is handled by the core
 
+
 class HPI_DCache(Cache):
     data_latency = 1
     tag_latency = 1
     response_latency = 1
     mshrs = 4
     tgts_per_mshr = 8
-    size = '32kB'
+    size = "32kB"
     assoc = 4
     write_buffers = 4
-    prefetcher = StridePrefetcher(
-        queue_size=4,
-        degree=4)
+    prefetcher = StridePrefetcher(queue_size=4, degree=4)
+
 
 class HPI_L2(Cache):
     data_latency = 13
@@ -1374,11 +1720,12 @@
     response_latency = 5
     mshrs = 4
     tgts_per_mshr = 8
-    size = '1024kB'
+    size = "1024kB"
     assoc = 16
     write_buffers = 16
     # prefetcher FIXME
 
+
 class HPI(ArmMinorCPU):
     # Inherit the doc string from the module to avoid repeating it
     # here.
@@ -1430,9 +1777,13 @@
 
     mmu = HPI_MMU()
 
+
 __all__ = [
     "HPI_BP",
-    "HPI_ITB", "HPI_DTB",
-    "HPI_ICache", "HPI_DCache", "HPI_L2",
+    "HPI_ITB",
+    "HPI_DTB",
+    "HPI_ICache",
+    "HPI_DCache",
+    "HPI_L2",
     "HPI",
 ]
diff --git a/configs/common/cores/arm/O3_ARM_v7a.py b/configs/common/cores/arm/O3_ARM_v7a.py
index d032a1a..77dc4e4 100644
--- a/configs/common/cores/arm/O3_ARM_v7a.py
+++ b/configs/common/cores/arm/O3_ARM_v7a.py
@@ -28,65 +28,82 @@
 
 # Simple ALU Instructions have a latency of 1
 class O3_ARM_v7a_Simple_Int(FUDesc):
-    opList = [ OpDesc(opClass='IntAlu', opLat=1) ]
+    opList = [OpDesc(opClass="IntAlu", opLat=1)]
     count = 2
 
+
 # Complex ALU instructions have a variable latencies
 class O3_ARM_v7a_Complex_Int(FUDesc):
-    opList = [ OpDesc(opClass='IntMult', opLat=3, pipelined=True),
-               OpDesc(opClass='IntDiv', opLat=12, pipelined=False),
-               OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ]
+    opList = [
+        OpDesc(opClass="IntMult", opLat=3, pipelined=True),
+        OpDesc(opClass="IntDiv", opLat=12, pipelined=False),
+        OpDesc(opClass="IprAccess", opLat=3, pipelined=True),
+    ]
     count = 1
 
 
 # Floating point and SIMD instructions
 class O3_ARM_v7a_FP(FUDesc):
-    opList = [ OpDesc(opClass='SimdAdd', opLat=4),
-               OpDesc(opClass='SimdAddAcc', opLat=4),
-               OpDesc(opClass='SimdAlu', opLat=4),
-               OpDesc(opClass='SimdCmp', opLat=4),
-               OpDesc(opClass='SimdCvt', opLat=3),
-               OpDesc(opClass='SimdMisc', opLat=3),
-               OpDesc(opClass='SimdMult',opLat=5),
-               OpDesc(opClass='SimdMultAcc',opLat=5),
-               OpDesc(opClass='SimdShift',opLat=3),
-               OpDesc(opClass='SimdShiftAcc', opLat=3),
-               OpDesc(opClass='SimdSqrt', opLat=9),
-               OpDesc(opClass='SimdFloatAdd',opLat=5),
-               OpDesc(opClass='SimdFloatAlu',opLat=5),
-               OpDesc(opClass='SimdFloatCmp', opLat=3),
-               OpDesc(opClass='SimdFloatCvt', opLat=3),
-               OpDesc(opClass='SimdFloatDiv', opLat=3),
-               OpDesc(opClass='SimdFloatMisc', opLat=3),
-               OpDesc(opClass='SimdFloatMult', opLat=3),
-               OpDesc(opClass='SimdFloatMultAcc',opLat=5),
-               OpDesc(opClass='SimdFloatSqrt', opLat=9),
-               OpDesc(opClass='FloatAdd', opLat=5),
-               OpDesc(opClass='FloatCmp', opLat=5),
-               OpDesc(opClass='FloatCvt', opLat=5),
-               OpDesc(opClass='FloatDiv', opLat=9, pipelined=False),
-               OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
-               OpDesc(opClass='FloatMult', opLat=4),
-               OpDesc(opClass='FloatMultAcc', opLat=5),
-               OpDesc(opClass='FloatMisc', opLat=3) ]
+    opList = [
+        OpDesc(opClass="SimdAdd", opLat=4),
+        OpDesc(opClass="SimdAddAcc", opLat=4),
+        OpDesc(opClass="SimdAlu", opLat=4),
+        OpDesc(opClass="SimdCmp", opLat=4),
+        OpDesc(opClass="SimdCvt", opLat=3),
+        OpDesc(opClass="SimdMisc", opLat=3),
+        OpDesc(opClass="SimdMult", opLat=5),
+        OpDesc(opClass="SimdMultAcc", opLat=5),
+        OpDesc(opClass="SimdShift", opLat=3),
+        OpDesc(opClass="SimdShiftAcc", opLat=3),
+        OpDesc(opClass="SimdSqrt", opLat=9),
+        OpDesc(opClass="SimdFloatAdd", opLat=5),
+        OpDesc(opClass="SimdFloatAlu", opLat=5),
+        OpDesc(opClass="SimdFloatCmp", opLat=3),
+        OpDesc(opClass="SimdFloatCvt", opLat=3),
+        OpDesc(opClass="SimdFloatDiv", opLat=3),
+        OpDesc(opClass="SimdFloatMisc", opLat=3),
+        OpDesc(opClass="SimdFloatMult", opLat=3),
+        OpDesc(opClass="SimdFloatMultAcc", opLat=5),
+        OpDesc(opClass="SimdFloatSqrt", opLat=9),
+        OpDesc(opClass="FloatAdd", opLat=5),
+        OpDesc(opClass="FloatCmp", opLat=5),
+        OpDesc(opClass="FloatCvt", opLat=5),
+        OpDesc(opClass="FloatDiv", opLat=9, pipelined=False),
+        OpDesc(opClass="FloatSqrt", opLat=33, pipelined=False),
+        OpDesc(opClass="FloatMult", opLat=4),
+        OpDesc(opClass="FloatMultAcc", opLat=5),
+        OpDesc(opClass="FloatMisc", opLat=3),
+    ]
     count = 2
 
 
 # Load/Store Units
 class O3_ARM_v7a_Load(FUDesc):
-    opList = [ OpDesc(opClass='MemRead',opLat=2),
-               OpDesc(opClass='FloatMemRead',opLat=2) ]
+    opList = [
+        OpDesc(opClass="MemRead", opLat=2),
+        OpDesc(opClass="FloatMemRead", opLat=2),
+    ]
     count = 1
 
+
 class O3_ARM_v7a_Store(FUDesc):
-    opList = [ OpDesc(opClass='MemWrite',opLat=2),
-               OpDesc(opClass='FloatMemWrite',opLat=2) ]
+    opList = [
+        OpDesc(opClass="MemWrite", opLat=2),
+        OpDesc(opClass="FloatMemWrite", opLat=2),
+    ]
     count = 1
 
+
 # Functional Units for this CPU
 class O3_ARM_v7a_FUP(FUPool):
-    FUList = [O3_ARM_v7a_Simple_Int(), O3_ARM_v7a_Complex_Int(),
-              O3_ARM_v7a_Load(), O3_ARM_v7a_Store(), O3_ARM_v7a_FP()]
+    FUList = [
+        O3_ARM_v7a_Simple_Int(),
+        O3_ARM_v7a_Complex_Int(),
+        O3_ARM_v7a_Load(),
+        O3_ARM_v7a_Store(),
+        O3_ARM_v7a_FP(),
+    ]
+
 
 # Bi-Mode Branch Predictor
 class O3_ARM_v7a_BP(BiModeBP):
@@ -99,6 +116,7 @@
     RASSize = 16
     instShiftAmt = 2
 
+
 class O3_ARM_v7a_3(ArmO3CPU):
     LQEntries = 16
     SQEntries = 16
@@ -143,6 +161,7 @@
     switched_out = False
     branchPred = O3_ARM_v7a_BP()
 
+
 # Instruction Cache
 class O3_ARM_v7a_ICache(Cache):
     tag_latency = 1
@@ -150,12 +169,13 @@
     response_latency = 1
     mshrs = 2
     tgts_per_mshr = 8
-    size = '32kB'
+    size = "32kB"
     assoc = 2
     is_read_only = True
     # Writeback clean lines as well
     writeback_clean = True
 
+
 # Data Cache
 class O3_ARM_v7a_DCache(Cache):
     tag_latency = 2
@@ -163,12 +183,13 @@
     response_latency = 2
     mshrs = 6
     tgts_per_mshr = 8
-    size = '32kB'
+    size = "32kB"
     assoc = 2
     write_buffers = 16
     # Consider the L2 a victim cache also for clean lines
     writeback_clean = True
 
+
 # L2 Cache
 class O3_ARM_v7aL2(Cache):
     tag_latency = 12
@@ -176,12 +197,12 @@
     response_latency = 12
     mshrs = 16
     tgts_per_mshr = 8
-    size = '1MB'
+    size = "1MB"
     assoc = 16
     write_buffers = 8
     prefetch_on_access = True
-    clusivity = 'mostly_excl'
+    clusivity = "mostly_excl"
     # Simple stride prefetcher
-    prefetcher = StridePrefetcher(degree=8, latency = 1)
+    prefetcher = StridePrefetcher(degree=8, latency=1)
     tags = BaseSetAssoc()
     replacement_policy = RandomRP()
diff --git a/configs/common/cores/arm/__init__.py b/configs/common/cores/arm/__init__.py
index dbc3b3e..135b75f 100644
--- a/configs/common/cores/arm/__init__.py
+++ b/configs/common/cores/arm/__init__.py
@@ -36,9 +36,7 @@
 from pkgutil import iter_modules
 from importlib import import_module
 
-_cpu_modules = [
-    name for _, name, ispkg in iter_modules(__path__) if not ispkg
-]
+_cpu_modules = [name for _, name, ispkg in iter_modules(__path__) if not ispkg]
 
 for c in _cpu_modules:
     try:
diff --git a/configs/common/cores/arm/ex5_LITTLE.py b/configs/common/cores/arm/ex5_LITTLE.py
index 57f6a6b..6974837 100644
--- a/configs/common/cores/arm/ex5_LITTLE.py
+++ b/configs/common/cores/arm/ex5_LITTLE.py
@@ -27,70 +27,89 @@
 
 from m5.objects import *
 
-#-----------------------------------------------------------------------
+# -----------------------------------------------------------------------
 #                ex5 LITTLE core (based on the ARM Cortex-A7)
-#-----------------------------------------------------------------------
+# -----------------------------------------------------------------------
 
 # Simple ALU Instructions have a latency of 3
 class ex5_LITTLE_Simple_Int(MinorDefaultIntFU):
-    opList = [ OpDesc(opClass='IntAlu', opLat=4) ]
+    opList = [OpDesc(opClass="IntAlu", opLat=4)]
+
 
 # Complex ALU instructions have a variable latencies
 class ex5_LITTLE_Complex_IntMul(MinorDefaultIntMulFU):
-    opList = [ OpDesc(opClass='IntMult', opLat=7) ]
+    opList = [OpDesc(opClass="IntMult", opLat=7)]
+
 
 class ex5_LITTLE_Complex_IntDiv(MinorDefaultIntDivFU):
-    opList = [ OpDesc(opClass='IntDiv', opLat=9) ]
+    opList = [OpDesc(opClass="IntDiv", opLat=9)]
+
 
 # Floating point and SIMD instructions
 class ex5_LITTLE_FP(MinorDefaultFloatSimdFU):
-    opList = [ OpDesc(opClass='SimdAdd', opLat=6),
-               OpDesc(opClass='SimdAddAcc', opLat=4),
-               OpDesc(opClass='SimdAlu', opLat=4),
-               OpDesc(opClass='SimdCmp', opLat=1),
-               OpDesc(opClass='SimdCvt', opLat=3),
-               OpDesc(opClass='SimdMisc', opLat=3),
-               OpDesc(opClass='SimdMult',opLat=4),
-               OpDesc(opClass='SimdMultAcc',opLat=5),
-               OpDesc(opClass='SimdShift',opLat=3),
-               OpDesc(opClass='SimdShiftAcc', opLat=3),
-               OpDesc(opClass='SimdSqrt', opLat=9),
-               OpDesc(opClass='SimdFloatAdd',opLat=8),
-               OpDesc(opClass='SimdFloatAlu',opLat=6),
-               OpDesc(opClass='SimdFloatCmp', opLat=6),
-               OpDesc(opClass='SimdFloatCvt', opLat=6),
-               OpDesc(opClass='SimdFloatDiv', opLat=20, pipelined=False),
-               OpDesc(opClass='SimdFloatMisc', opLat=6),
-               OpDesc(opClass='SimdFloatMult', opLat=15),
-               OpDesc(opClass='SimdFloatMultAcc',opLat=6),
-               OpDesc(opClass='SimdFloatSqrt', opLat=17),
-               OpDesc(opClass='FloatAdd', opLat=8),
-               OpDesc(opClass='FloatCmp', opLat=6),
-               OpDesc(opClass='FloatCvt', opLat=6),
-               OpDesc(opClass='FloatDiv', opLat=15, pipelined=False),
-               OpDesc(opClass='FloatSqrt', opLat=33),
-               OpDesc(opClass='FloatMult', opLat=6) ]
+    opList = [
+        OpDesc(opClass="SimdAdd", opLat=6),
+        OpDesc(opClass="SimdAddAcc", opLat=4),
+        OpDesc(opClass="SimdAlu", opLat=4),
+        OpDesc(opClass="SimdCmp", opLat=1),
+        OpDesc(opClass="SimdCvt", opLat=3),
+        OpDesc(opClass="SimdMisc", opLat=3),
+        OpDesc(opClass="SimdMult", opLat=4),
+        OpDesc(opClass="SimdMultAcc", opLat=5),
+        OpDesc(opClass="SimdShift", opLat=3),
+        OpDesc(opClass="SimdShiftAcc", opLat=3),
+        OpDesc(opClass="SimdSqrt", opLat=9),
+        OpDesc(opClass="SimdFloatAdd", opLat=8),
+        OpDesc(opClass="SimdFloatAlu", opLat=6),
+        OpDesc(opClass="SimdFloatCmp", opLat=6),
+        OpDesc(opClass="SimdFloatCvt", opLat=6),
+        OpDesc(opClass="SimdFloatDiv", opLat=20, pipelined=False),
+        OpDesc(opClass="SimdFloatMisc", opLat=6),
+        OpDesc(opClass="SimdFloatMult", opLat=15),
+        OpDesc(opClass="SimdFloatMultAcc", opLat=6),
+        OpDesc(opClass="SimdFloatSqrt", opLat=17),
+        OpDesc(opClass="FloatAdd", opLat=8),
+        OpDesc(opClass="FloatCmp", opLat=6),
+        OpDesc(opClass="FloatCvt", opLat=6),
+        OpDesc(opClass="FloatDiv", opLat=15, pipelined=False),
+        OpDesc(opClass="FloatSqrt", opLat=33),
+        OpDesc(opClass="FloatMult", opLat=6),
+    ]
+
 
 # Load/Store Units
 class ex5_LITTLE_MemFU(MinorDefaultMemFU):
-    opList = [ OpDesc(opClass='MemRead',opLat=1),
-               OpDesc(opClass='MemWrite',opLat=1) ]
+    opList = [
+        OpDesc(opClass="MemRead", opLat=1),
+        OpDesc(opClass="MemWrite", opLat=1),
+    ]
+
 
 # Misc Unit
 class ex5_LITTLE_MiscFU(MinorDefaultMiscFU):
-    opList = [ OpDesc(opClass='IprAccess',opLat=1),
-               OpDesc(opClass='InstPrefetch',opLat=1) ]
+    opList = [
+        OpDesc(opClass="IprAccess", opLat=1),
+        OpDesc(opClass="InstPrefetch", opLat=1),
+    ]
+
 
 # Functional Units for this CPU
 class ex5_LITTLE_FUP(MinorFUPool):
-    funcUnits = [ex5_LITTLE_Simple_Int(), ex5_LITTLE_Simple_Int(),
-        ex5_LITTLE_Complex_IntMul(), ex5_LITTLE_Complex_IntDiv(),
-        ex5_LITTLE_FP(), ex5_LITTLE_MemFU(),
-        ex5_LITTLE_MiscFU()]
+    funcUnits = [
+        ex5_LITTLE_Simple_Int(),
+        ex5_LITTLE_Simple_Int(),
+        ex5_LITTLE_Complex_IntMul(),
+        ex5_LITTLE_Complex_IntDiv(),
+        ex5_LITTLE_FP(),
+        ex5_LITTLE_MemFU(),
+        ex5_LITTLE_MiscFU(),
+    ]
+
 
 class ex5_LITTLE(ArmMinorCPU):
     executeFuncUnits = ex5_LITTLE_FUP()
 
+
 class L1Cache(Cache):
     tag_latency = 2
     data_latency = 2
@@ -99,19 +118,22 @@
     # Consider the L2 a victim cache also for clean lines
     writeback_clean = True
 
+
 class L1I(L1Cache):
     mshrs = 2
-    size = '32kB'
+    size = "32kB"
     assoc = 2
     is_read_only = True
     tgts_per_mshr = 20
 
+
 class L1D(L1Cache):
     mshrs = 4
-    size = '32kB'
+    size = "32kB"
     assoc = 4
     write_buffers = 4
 
+
 # L2 Cache
 class L2(Cache):
     tag_latency = 9
@@ -119,12 +141,12 @@
     response_latency = 9
     mshrs = 8
     tgts_per_mshr = 12
-    size = '512kB'
+    size = "512kB"
     assoc = 8
     write_buffers = 16
     prefetch_on_access = True
-    clusivity = 'mostly_excl'
+    clusivity = "mostly_excl"
     # Simple stride prefetcher
-    prefetcher = StridePrefetcher(degree=1, latency = 1)
+    prefetcher = StridePrefetcher(degree=1, latency=1)
     tags = BaseSetAssoc()
     replacement_policy = RandomRP()
diff --git a/configs/common/cores/arm/ex5_big.py b/configs/common/cores/arm/ex5_big.py
index de7a450..70af6b8 100644
--- a/configs/common/cores/arm/ex5_big.py
+++ b/configs/common/cores/arm/ex5_big.py
@@ -27,66 +27,80 @@
 
 from m5.objects import *
 
-#-----------------------------------------------------------------------
+# -----------------------------------------------------------------------
 #                ex5 big core (based on the ARM Cortex-A15)
-#-----------------------------------------------------------------------
+# -----------------------------------------------------------------------
 
 # Simple ALU Instructions have a latency of 1
 class ex5_big_Simple_Int(FUDesc):
-    opList = [ OpDesc(opClass='IntAlu', opLat=1) ]
+    opList = [OpDesc(opClass="IntAlu", opLat=1)]
     count = 2
 
+
 # Complex ALU instructions have a variable latencies
 class ex5_big_Complex_Int(FUDesc):
-    opList = [ OpDesc(opClass='IntMult', opLat=4, pipelined=True),
-               OpDesc(opClass='IntDiv', opLat=11, pipelined=False),
-               OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ]
+    opList = [
+        OpDesc(opClass="IntMult", opLat=4, pipelined=True),
+        OpDesc(opClass="IntDiv", opLat=11, pipelined=False),
+        OpDesc(opClass="IprAccess", opLat=3, pipelined=True),
+    ]
     count = 1
 
+
 # Floating point and SIMD instructions
 class ex5_big_FP(FUDesc):
-    opList = [ OpDesc(opClass='SimdAdd', opLat=3),
-               OpDesc(opClass='SimdAddAcc', opLat=4),
-               OpDesc(opClass='SimdAlu', opLat=4),
-               OpDesc(opClass='SimdCmp', opLat=4),
-               OpDesc(opClass='SimdCvt', opLat=3),
-               OpDesc(opClass='SimdMisc', opLat=3),
-               OpDesc(opClass='SimdMult',opLat=6),
-               OpDesc(opClass='SimdMultAcc',opLat=5),
-               OpDesc(opClass='SimdShift',opLat=3),
-               OpDesc(opClass='SimdShiftAcc', opLat=3),
-               OpDesc(opClass='SimdSqrt', opLat=9),
-               OpDesc(opClass='SimdFloatAdd',opLat=6),
-               OpDesc(opClass='SimdFloatAlu',opLat=5),
-               OpDesc(opClass='SimdFloatCmp', opLat=3),
-               OpDesc(opClass='SimdFloatCvt', opLat=3),
-               OpDesc(opClass='SimdFloatDiv', opLat=21),
-               OpDesc(opClass='SimdFloatMisc', opLat=3),
-               OpDesc(opClass='SimdFloatMult', opLat=6),
-               OpDesc(opClass='SimdFloatMultAcc',opLat=1),
-               OpDesc(opClass='SimdFloatSqrt', opLat=9),
-               OpDesc(opClass='FloatAdd', opLat=6),
-               OpDesc(opClass='FloatCmp', opLat=5),
-               OpDesc(opClass='FloatCvt', opLat=5),
-               OpDesc(opClass='FloatDiv', opLat=12, pipelined=False),
-               OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
-               OpDesc(opClass='FloatMult', opLat=8) ]
+    opList = [
+        OpDesc(opClass="SimdAdd", opLat=3),
+        OpDesc(opClass="SimdAddAcc", opLat=4),
+        OpDesc(opClass="SimdAlu", opLat=4),
+        OpDesc(opClass="SimdCmp", opLat=4),
+        OpDesc(opClass="SimdCvt", opLat=3),
+        OpDesc(opClass="SimdMisc", opLat=3),
+        OpDesc(opClass="SimdMult", opLat=6),
+        OpDesc(opClass="SimdMultAcc", opLat=5),
+        OpDesc(opClass="SimdShift", opLat=3),
+        OpDesc(opClass="SimdShiftAcc", opLat=3),
+        OpDesc(opClass="SimdSqrt", opLat=9),
+        OpDesc(opClass="SimdFloatAdd", opLat=6),
+        OpDesc(opClass="SimdFloatAlu", opLat=5),
+        OpDesc(opClass="SimdFloatCmp", opLat=3),
+        OpDesc(opClass="SimdFloatCvt", opLat=3),
+        OpDesc(opClass="SimdFloatDiv", opLat=21),
+        OpDesc(opClass="SimdFloatMisc", opLat=3),
+        OpDesc(opClass="SimdFloatMult", opLat=6),
+        OpDesc(opClass="SimdFloatMultAcc", opLat=1),
+        OpDesc(opClass="SimdFloatSqrt", opLat=9),
+        OpDesc(opClass="FloatAdd", opLat=6),
+        OpDesc(opClass="FloatCmp", opLat=5),
+        OpDesc(opClass="FloatCvt", opLat=5),
+        OpDesc(opClass="FloatDiv", opLat=12, pipelined=False),
+        OpDesc(opClass="FloatSqrt", opLat=33, pipelined=False),
+        OpDesc(opClass="FloatMult", opLat=8),
+    ]
     count = 2
 
 
 # Load/Store Units
 class ex5_big_Load(FUDesc):
-    opList = [ OpDesc(opClass='MemRead',opLat=2) ]
+    opList = [OpDesc(opClass="MemRead", opLat=2)]
     count = 1
 
+
 class ex5_big_Store(FUDesc):
-    opList = [OpDesc(opClass='MemWrite',opLat=2) ]
+    opList = [OpDesc(opClass="MemWrite", opLat=2)]
     count = 1
 
+
 # Functional Units for this CPU
 class ex5_big_FUP(FUPool):
-    FUList = [ex5_big_Simple_Int(), ex5_big_Complex_Int(),
-              ex5_big_Load(), ex5_big_Store(), ex5_big_FP()]
+    FUList = [
+        ex5_big_Simple_Int(),
+        ex5_big_Complex_Int(),
+        ex5_big_Load(),
+        ex5_big_Store(),
+        ex5_big_FP(),
+    ]
+
 
 # Bi-Mode Branch Predictor
 class ex5_big_BP(BiModeBP):
@@ -99,6 +113,7 @@
     RASSize = 48
     instShiftAmt = 2
 
+
 class ex5_big(ArmO3CPU):
     LQEntries = 16
     SQEntries = 16
@@ -142,6 +157,7 @@
     switched_out = False
     branchPred = ex5_big_BP()
 
+
 class L1Cache(Cache):
     tag_latency = 2
     data_latency = 2
@@ -150,20 +166,23 @@
     # Consider the L2 a victim cache also for clean lines
     writeback_clean = True
 
+
 # Instruction Cache
 class L1I(L1Cache):
     mshrs = 2
-    size = '32kB'
+    size = "32kB"
     assoc = 2
     is_read_only = True
 
+
 # Data Cache
 class L1D(L1Cache):
     mshrs = 6
-    size = '32kB'
+    size = "32kB"
     assoc = 2
     write_buffers = 16
 
+
 # L2 Cache
 class L2(Cache):
     tag_latency = 15
@@ -171,12 +190,12 @@
     response_latency = 15
     mshrs = 16
     tgts_per_mshr = 8
-    size = '2MB'
+    size = "2MB"
     assoc = 16
     write_buffers = 8
     prefetch_on_access = True
-    clusivity = 'mostly_excl'
+    clusivity = "mostly_excl"
     # Simple stride prefetcher
-    prefetcher = StridePrefetcher(degree=8, latency = 1)
+    prefetcher = StridePrefetcher(degree=8, latency=1)
     tags = BaseSetAssoc()
     replacement_policy = RandomRP()
diff --git a/configs/common/cpu2000.py b/configs/common/cpu2000.py
index 266bba0..3b1b390 100644
--- a/configs/common/cpu2000.py
+++ b/configs/common/cpu2000.py
@@ -29,7 +29,8 @@
 from os.path import basename, exists, join as joinpath, normpath
 from os.path import isdir, isfile, islink
 
-spec_dist = os.environ.get('M5_CPU2000', '/dist/m5/cpu2000')
+spec_dist = os.environ.get("M5_CPU2000", "/dist/m5/cpu2000")
+
 
 def copyfiles(srcdir, dstdir):
     from filecmp import cmp as filecmp
@@ -45,8 +46,8 @@
         root = normpath(root)
         prefix = os.path.commonprefix([root, srcdir])
 
-        root = root[len(prefix):]
-        if root.startswith('/'):
+        root = root[len(prefix) :]
+        if root.startswith("/"):
             root = root[1:]
 
         for entry in dirs:
@@ -62,68 +63,70 @@
 
     # some of the spec benchmarks expect to be run from one directory up.
     # just create some symlinks that solve the problem
-    inlink = joinpath(dstdir, 'input')
-    outlink = joinpath(dstdir, 'output')
+    inlink = joinpath(dstdir, "input")
+    outlink = joinpath(dstdir, "output")
     if not exists(inlink):
-        os.symlink('.', inlink)
+        os.symlink(".", inlink)
     if not exists(outlink):
-        os.symlink('.', outlink)
+        os.symlink(".", outlink)
+
 
 class Benchmark(object):
     def __init__(self, isa, os, input_set):
-        if not hasattr(self.__class__, 'name'):
+        if not hasattr(self.__class__, "name"):
             self.name = self.__class__.__name__
 
-        if not hasattr(self.__class__, 'binary'):
+        if not hasattr(self.__class__, "binary"):
             self.binary = self.name
 
-        if not hasattr(self.__class__, 'args'):
+        if not hasattr(self.__class__, "args"):
             self.args = []
 
-        if not hasattr(self.__class__, 'output'):
-            self.output = '%s.out' % self.name
+        if not hasattr(self.__class__, "output"):
+            self.output = "%s.out" % self.name
 
-        if not hasattr(self.__class__, 'simpoint'):
+        if not hasattr(self.__class__, "simpoint"):
             self.simpoint = None
 
         try:
             func = getattr(self.__class__, input_set)
         except AttributeError:
             raise AttributeError(
-                'The benchmark %s does not have the %s input set' % \
-                (self.name, input_set))
+                "The benchmark %s does not have the %s input set"
+                % (self.name, input_set)
+            )
 
-        executable = joinpath(spec_dist, 'binaries', isa, os, self.binary)
+        executable = joinpath(spec_dist, "binaries", isa, os, self.binary)
         if not isfile(executable):
-            raise AttributeError('%s not found' % executable)
+            raise AttributeError("%s not found" % executable)
         self.executable = executable
 
         # root of tree for input & output data files
-        data_dir = joinpath(spec_dist, 'data', self.name)
+        data_dir = joinpath(spec_dist, "data", self.name)
         # optional subtree with files shared across input sets
-        all_dir = joinpath(data_dir, 'all')
+        all_dir = joinpath(data_dir, "all")
         # dirs for input & output files for this input set
-        inputs_dir = joinpath(data_dir, input_set, 'input')
-        outputs_dir = joinpath(data_dir, input_set, 'output')
+        inputs_dir = joinpath(data_dir, input_set, "input")
+        outputs_dir = joinpath(data_dir, input_set, "output")
         # keep around which input set was specified
         self.input_set = input_set
 
         if not isdir(inputs_dir):
-            raise AttributeError('%s not found' % inputs_dir)
+            raise AttributeError("%s not found" % inputs_dir)
 
-        self.inputs_dir = [ inputs_dir ]
+        self.inputs_dir = [inputs_dir]
         if isdir(all_dir):
-            self.inputs_dir += [ joinpath(all_dir, 'input') ]
+            self.inputs_dir += [joinpath(all_dir, "input")]
         if isdir(outputs_dir):
             self.outputs_dir = outputs_dir
 
-        if not hasattr(self.__class__, 'stdin'):
-            self.stdin = joinpath(inputs_dir, '%s.in' % self.name)
+        if not hasattr(self.__class__, "stdin"):
+            self.stdin = joinpath(inputs_dir, "%s.in" % self.name)
             if not isfile(self.stdin):
                 self.stdin = None
 
-        if not hasattr(self.__class__, 'stdout'):
-            self.stdout = joinpath(outputs_dir, '%s.out' % self.name)
+        if not hasattr(self.__class__, "stdout"):
+            self.stdout = joinpath(outputs_dir, "%s.out" % self.name)
             if not isfile(self.stdout):
                 self.stdout = None
 
@@ -132,14 +135,14 @@
     def makeProcessArgs(self, **kwargs):
         # set up default args for Process object
         process_args = {}
-        process_args['cmd'] = [ self.name ] + self.args
-        process_args['executable'] = self.executable
+        process_args["cmd"] = [self.name] + self.args
+        process_args["executable"] = self.executable
         if self.stdin:
-            process_args['input'] = self.stdin
+            process_args["input"] = self.stdin
         if self.stdout:
-            process_args['output'] = self.stdout
+            process_args["output"] = self.stdout
         if self.simpoint:
-            process_args['simpoint'] = self.simpoint
+            process_args["simpoint"] = self.simpoint
         # explicit keywords override defaults
         process_args.update(kwargs)
 
@@ -150,12 +153,13 @@
 
         # figure out working directory: use m5's outdir unless
         # overridden by Process's cwd param
-        cwd = process_args.get('cwd')
+        cwd = process_args.get("cwd")
 
         if not cwd:
             from m5 import options
+
             cwd = options.outdir
-            process_args['cwd'] = cwd
+            process_args["cwd"] = cwd
         if not isdir(cwd):
             os.makedirs(cwd)
         # copy input files to working directory
@@ -163,590 +167,872 @@
             copyfiles(d, cwd)
         # generate Process object
         from m5.objects import Process
+
         return Process(**process_args)
 
     def __str__(self):
         return self.name
 
+
 class DefaultBenchmark(Benchmark):
-    def ref(self, isa, os): pass
-    def test(self, isa, os): pass
-    def train(self, isa, os): pass
-
-class MinneDefaultBenchmark(DefaultBenchmark):
-    def smred(self, isa, os): pass
-    def mdred(self, isa, os): pass
-    def lgred(self, isa, os): pass
-
-class ammp(MinneDefaultBenchmark):
-    name = 'ammp'
-    number = 188
-    lang = 'C'
-    simpoint = 108*100E6
-
-class applu(MinneDefaultBenchmark):
-    name = 'applu'
-    number = 173
-    lang = 'F77'
-    simpoint = 2179*100E6
-
-class apsi(MinneDefaultBenchmark):
-    name = 'apsi'
-    number = 301
-    lang = 'F77'
-    simpoint = 3408*100E6
-
-class art(DefaultBenchmark):
-    name = 'art'
-    number = 179
-    lang = 'C'
+    def ref(self, isa, os):
+        pass
 
     def test(self, isa, os):
-        self.args = [ '-scanfile', 'c756hel.in',
-                      '-trainfile1', 'a10.img',
-                      '-stride', '2',
-                      '-startx', '134',
-                      '-starty', '220',
-                      '-endx', '139',
-                      '-endy', '225',
-                      '-objects', '1' ]
-        self.output = 'test.out'
+        pass
 
     def train(self, isa, os):
-        self.args = [ '-scanfile', 'c756hel.in',
-                      '-trainfile1', 'a10.img',
-                      '-stride', '2',
-                      '-startx', '134',
-                      '-starty', '220',
-                      '-endx', '184',
-                      '-endy', '240',
-                      '-objects', '3' ]
-        self.output = 'train.out'
+        pass
+
+
+class MinneDefaultBenchmark(DefaultBenchmark):
+    def smred(self, isa, os):
+        pass
+
+    def mdred(self, isa, os):
+        pass
 
     def lgred(self, isa, os):
-        self.args = ['-scanfile', 'c756hel.in',
-                     '-trainfile1', 'a10.img',
-                     '-stride', '5',
-                     '-startx', '134',
-                     '-starty', '220',
-                     '-endx', '184',
-                     '-endy', '240',
-                     '-objects', '1' ]
-        self.output = 'lgred.out'
+        pass
+
+
+class ammp(MinneDefaultBenchmark):
+    name = "ammp"
+    number = 188
+    lang = "C"
+    simpoint = 108 * 100e6
+
+
+class applu(MinneDefaultBenchmark):
+    name = "applu"
+    number = 173
+    lang = "F77"
+    simpoint = 2179 * 100e6
+
+
+class apsi(MinneDefaultBenchmark):
+    name = "apsi"
+    number = 301
+    lang = "F77"
+    simpoint = 3408 * 100e6
+
+
+class art(DefaultBenchmark):
+    name = "art"
+    number = 179
+    lang = "C"
+
+    def test(self, isa, os):
+        self.args = [
+            "-scanfile",
+            "c756hel.in",
+            "-trainfile1",
+            "a10.img",
+            "-stride",
+            "2",
+            "-startx",
+            "134",
+            "-starty",
+            "220",
+            "-endx",
+            "139",
+            "-endy",
+            "225",
+            "-objects",
+            "1",
+        ]
+        self.output = "test.out"
+
+    def train(self, isa, os):
+        self.args = [
+            "-scanfile",
+            "c756hel.in",
+            "-trainfile1",
+            "a10.img",
+            "-stride",
+            "2",
+            "-startx",
+            "134",
+            "-starty",
+            "220",
+            "-endx",
+            "184",
+            "-endy",
+            "240",
+            "-objects",
+            "3",
+        ]
+        self.output = "train.out"
+
+    def lgred(self, isa, os):
+        self.args = [
+            "-scanfile",
+            "c756hel.in",
+            "-trainfile1",
+            "a10.img",
+            "-stride",
+            "5",
+            "-startx",
+            "134",
+            "-starty",
+            "220",
+            "-endx",
+            "184",
+            "-endy",
+            "240",
+            "-objects",
+            "1",
+        ]
+        self.output = "lgred.out"
 
 
 class art110(art):
     def ref(self, isa, os):
-        self.args = [ '-scanfile', 'c756hel.in',
-                      '-trainfile1', 'a10.img',
-                      '-trainfile2', 'hc.img',
-                      '-stride', '2',
-                      '-startx', '110',
-                      '-starty', '200',
-                      '-endx', '160',
-                      '-endy', '240',
-                      '-objects', '10' ]
-        self.output = 'ref.1.out'
-        self.simpoint = 340*100E6
+        self.args = [
+            "-scanfile",
+            "c756hel.in",
+            "-trainfile1",
+            "a10.img",
+            "-trainfile2",
+            "hc.img",
+            "-stride",
+            "2",
+            "-startx",
+            "110",
+            "-starty",
+            "200",
+            "-endx",
+            "160",
+            "-endy",
+            "240",
+            "-objects",
+            "10",
+        ]
+        self.output = "ref.1.out"
+        self.simpoint = 340 * 100e6
+
 
 class art470(art):
     def ref(self, isa, os):
-        self.args = [ '-scanfile', 'c756hel.in',
-                      '-trainfile1', 'a10.img',
-                      '-trainfile2', 'hc.img',
-                      '-stride', '2',
-                      '-startx', '470',
-                      '-starty', '140',
-                      '-endx', '520',
-                      '-endy', '180',
-                      '-objects', '10' ]
-        self.output = 'ref.2.out'
-        self.simpoint = 365*100E6
+        self.args = [
+            "-scanfile",
+            "c756hel.in",
+            "-trainfile1",
+            "a10.img",
+            "-trainfile2",
+            "hc.img",
+            "-stride",
+            "2",
+            "-startx",
+            "470",
+            "-starty",
+            "140",
+            "-endx",
+            "520",
+            "-endy",
+            "180",
+            "-objects",
+            "10",
+        ]
+        self.output = "ref.2.out"
+        self.simpoint = 365 * 100e6
+
 
 class equake(DefaultBenchmark):
-    name = 'equake'
+    name = "equake"
     number = 183
-    lang = 'C'
-    simpoint = 812*100E6
+    lang = "C"
+    simpoint = 812 * 100e6
 
-    def lgred(self, isa, os): pass
+    def lgred(self, isa, os):
+        pass
+
 
 class facerec(MinneDefaultBenchmark):
-    name = 'facerec'
+    name = "facerec"
     number = 187
-    lang = 'F'
-    simpoint = 375*100E6
+    lang = "F"
+    simpoint = 375 * 100e6
+
 
 class fma3d(MinneDefaultBenchmark):
-    name = 'fma3d'
+    name = "fma3d"
     number = 191
-    lang = 'F'
-    simpoint = 2541*100E6
+    lang = "F"
+    simpoint = 2541 * 100e6
+
 
 class galgel(MinneDefaultBenchmark):
-    name = 'galgel'
+    name = "galgel"
     number = 178
-    lang = 'F'
-    simpoint = 2491*100E6
+    lang = "F"
+    simpoint = 2491 * 100e6
+
 
 class lucas(MinneDefaultBenchmark):
-    name = 'lucas'
+    name = "lucas"
     number = 189
-    lang = 'F'
-    simpoint = 545*100E6
+    lang = "F"
+    simpoint = 545 * 100e6
+
 
 class mesa(Benchmark):
-    name = 'mesa'
+    name = "mesa"
     number = 177
-    lang = 'C'
+    lang = "C"
     stdin = None
 
     def __set_args(self, frames):
-        self.args = [ '-frames', frames, '-meshfile', '%s.in' % self.name,
-                      '-ppmfile', '%s.ppm' % self.name ]
+        self.args = [
+            "-frames",
+            frames,
+            "-meshfile",
+            "%s.in" % self.name,
+            "-ppmfile",
+            "%s.ppm" % self.name,
+        ]
 
     def test(self, isa, os):
-        self.__set_args('10')
+        self.__set_args("10")
 
     def train(self, isa, os):
-        self.__set_args('500')
+        self.__set_args("500")
 
     def ref(self, isa, os):
-        self.__set_args('1000')
-        self.simpoint = 1135*100E6
+        self.__set_args("1000")
+        self.simpoint = 1135 * 100e6
 
     def lgred(self, isa, os):
-        self.__set_args('1')
+        self.__set_args("1")
+
 
 class mgrid(MinneDefaultBenchmark):
-    name = 'mgrid'
+    name = "mgrid"
     number = 172
-    lang = 'F77'
-    simpoint = 3292*100E6
+    lang = "F77"
+    simpoint = 3292 * 100e6
+
 
 class sixtrack(DefaultBenchmark):
-    name = 'sixtrack'
+    name = "sixtrack"
     number = 200
-    lang = 'F77'
-    simpoint = 3043*100E6
+    lang = "F77"
+    simpoint = 3043 * 100e6
 
-    def lgred(self, isa, os): pass
+    def lgred(self, isa, os):
+        pass
+
 
 class swim(MinneDefaultBenchmark):
-    name = 'swim'
+    name = "swim"
     number = 171
-    lang = 'F77'
-    simpoint = 2079*100E6
+    lang = "F77"
+    simpoint = 2079 * 100e6
+
 
 class wupwise(DefaultBenchmark):
-    name = 'wupwise'
+    name = "wupwise"
     number = 168
-    lang = 'F77'
-    simpoint = 3237*100E6
+    lang = "F77"
+    simpoint = 3237 * 100e6
 
-    def lgred(self, isa, os): pass
+    def lgred(self, isa, os):
+        pass
+
 
 class bzip2(DefaultBenchmark):
-    name = 'bzip2'
+    name = "bzip2"
     number = 256
-    lang = 'C'
+    lang = "C"
 
     def test(self, isa, os):
-        self.args = [ 'input.random' ]
+        self.args = ["input.random"]
 
     def train(self, isa, os):
-        self.args = [ 'input.compressed' ]
+        self.args = ["input.compressed"]
+
 
 class bzip2_source(bzip2):
     def ref(self, isa, os):
-        self.simpoint = 977*100E6
-        self.args = [ 'input.source', '58' ]
+        self.simpoint = 977 * 100e6
+        self.args = ["input.source", "58"]
 
     def lgred(self, isa, os):
-        self.args = [ 'input.source', '1' ]
+        self.args = ["input.source", "1"]
+
 
 class bzip2_graphic(bzip2):
     def ref(self, isa, os):
-        self.simpoint = 718*100E6
-        self.args = [ 'input.graphic', '58' ]
+        self.simpoint = 718 * 100e6
+        self.args = ["input.graphic", "58"]
 
     def lgred(self, isa, os):
-        self.args = [ 'input.graphic', '1' ]
+        self.args = ["input.graphic", "1"]
+
 
 class bzip2_program(bzip2):
     def ref(self, isa, os):
-        self.simpoint = 458*100E6
-        self.args = [ 'input.program', '58' ]
+        self.simpoint = 458 * 100e6
+        self.args = ["input.program", "58"]
 
     def lgred(self, isa, os):
-        self.args = [ 'input.program', '1' ]
+        self.args = ["input.program", "1"]
+
 
 class crafty(MinneDefaultBenchmark):
-    name = 'crafty'
+    name = "crafty"
     number = 186
-    lang = 'C'
-    simpoint = 774*100E6
+    lang = "C"
+    simpoint = 774 * 100e6
+
 
 class eon(MinneDefaultBenchmark):
-    name = 'eon'
+    name = "eon"
     number = 252
-    lang = 'CXX'
+    lang = "CXX"
     stdin = None
 
+
 class eon_kajiya(eon):
-    args = [ 'chair.control.kajiya', 'chair.camera', 'chair.surfaces',
-             'chair.kajiya.ppm', 'ppm', 'pixels_out.kajiya']
-    output = 'kajiya_log.out'
+    args = [
+        "chair.control.kajiya",
+        "chair.camera",
+        "chair.surfaces",
+        "chair.kajiya.ppm",
+        "ppm",
+        "pixels_out.kajiya",
+    ]
+    output = "kajiya_log.out"
 
 
 class eon_cook(eon):
-    args = [ 'chair.control.cook', 'chair.camera', 'chair.surfaces',
-             'chair.cook.ppm', 'ppm', 'pixels_out.cook' ]
-    output = 'cook_log.out'
+    args = [
+        "chair.control.cook",
+        "chair.camera",
+        "chair.surfaces",
+        "chair.cook.ppm",
+        "ppm",
+        "pixels_out.cook",
+    ]
+    output = "cook_log.out"
+
 
 class eon_rushmeier(eon):
-    args = [ 'chair.control.rushmeier', 'chair.camera', 'chair.surfaces',
-             'chair.rushmeier.ppm', 'ppm', 'pixels_out.rushmeier' ]
-    output = 'rushmeier_log.out'
-    simpoint = 403*100E6
+    args = [
+        "chair.control.rushmeier",
+        "chair.camera",
+        "chair.surfaces",
+        "chair.rushmeier.ppm",
+        "ppm",
+        "pixels_out.rushmeier",
+    ]
+    output = "rushmeier_log.out"
+    simpoint = 403 * 100e6
+
 
 class gap(DefaultBenchmark):
-    name = 'gap'
+    name = "gap"
     number = 254
-    lang = 'C'
+    lang = "C"
 
     def __set_args(self, size):
-        self.args = [ '-l', './', '-q', '-m', size ]
+        self.args = ["-l", "./", "-q", "-m", size]
 
     def test(self, isa, os):
-        self.__set_args('64M')
+        self.__set_args("64M")
 
     def train(self, isa, os):
-        self.__set_args('128M')
+        self.__set_args("128M")
 
     def ref(self, isa, os):
-        self.__set_args('192M')
-        self.simpoint = 674*100E6
+        self.__set_args("192M")
+        self.simpoint = 674 * 100e6
 
     def lgred(self, isa, os):
-        self.__set_args('64M')
+        self.__set_args("64M")
 
     def mdred(self, isa, os):
-        self.__set_args('64M')
+        self.__set_args("64M")
 
     def smred(self, isa, os):
-        self.__set_args('64M')
+        self.__set_args("64M")
+
 
 class gcc(DefaultBenchmark):
-    name = 'gcc'
+    name = "gcc"
     number = 176
-    lang = 'C'
+    lang = "C"
 
     def test(self, isa, os):
-        self.args = [ 'cccp.i', '-o', 'cccp.s' ]
+        self.args = ["cccp.i", "-o", "cccp.s"]
 
     def train(self, isa, os):
-        self.args = [ 'cp-decl.i', '-o', 'cp-decl.s' ]
+        self.args = ["cp-decl.i", "-o", "cp-decl.s"]
 
     def smred(self, isa, os):
-        self.args = [ 'c-iterate.i', '-o', 'c-iterate.s' ]
+        self.args = ["c-iterate.i", "-o", "c-iterate.s"]
 
     def mdred(self, isa, os):
-        self.args = [ 'rdlanal.i', '-o', 'rdlanal.s' ]
+        self.args = ["rdlanal.i", "-o", "rdlanal.s"]
 
     def lgred(self, isa, os):
-        self.args = [ 'cp-decl.i', '-o', 'cp-decl.s' ]
+        self.args = ["cp-decl.i", "-o", "cp-decl.s"]
+
 
 class gcc_166(gcc):
     def ref(self, isa, os):
-        self.simpoint = 389*100E6
-        self.args = [ '166.i', '-o', '166.s' ]
+        self.simpoint = 389 * 100e6
+        self.args = ["166.i", "-o", "166.s"]
+
 
 class gcc_200(gcc):
     def ref(self, isa, os):
-        self.simpoint = 736*100E6
-        self.args = [ '200.i', '-o', '200.s' ]
+        self.simpoint = 736 * 100e6
+        self.args = ["200.i", "-o", "200.s"]
+
 
 class gcc_expr(gcc):
     def ref(self, isa, os):
-        self.simpoint = 36*100E6
-        self.args = [ 'expr.i', '-o', 'expr.s' ]
+        self.simpoint = 36 * 100e6
+        self.args = ["expr.i", "-o", "expr.s"]
+
 
 class gcc_integrate(gcc):
     def ref(self, isa, os):
-        self.simpoint = 4*100E6
-        self.args = [ 'integrate.i', '-o', 'integrate.s' ]
+        self.simpoint = 4 * 100e6
+        self.args = ["integrate.i", "-o", "integrate.s"]
+
 
 class gcc_scilab(gcc):
     def ref(self, isa, os):
-        self.simpoint = 207*100E6
-        self.args = [ 'scilab.i', '-o', 'scilab.s' ]
+        self.simpoint = 207 * 100e6
+        self.args = ["scilab.i", "-o", "scilab.s"]
+
 
 class gzip(DefaultBenchmark):
-    name = 'gzip'
+    name = "gzip"
     number = 164
-    lang = 'C'
+    lang = "C"
 
     def test(self, isa, os):
-        self.args = [ 'input.compressed', '2' ]
+        self.args = ["input.compressed", "2"]
 
     def train(self, isa, os):
-        self.args = [ 'input.combined', '32' ]
+        self.args = ["input.combined", "32"]
+
 
 class gzip_source(gzip):
     def ref(self, isa, os):
-        self.simpoint = 334*100E6
-        self.args = [ 'input.source', '1' ]
+        self.simpoint = 334 * 100e6
+        self.args = ["input.source", "1"]
+
     def smred(self, isa, os):
-        self.args = [ 'input.source', '1' ]
+        self.args = ["input.source", "1"]
+
     def mdred(self, isa, os):
-        self.args = [ 'input.source', '1' ]
+        self.args = ["input.source", "1"]
+
     def lgred(self, isa, os):
-        self.args = [ 'input.source', '1' ]
+        self.args = ["input.source", "1"]
+
 
 class gzip_log(gzip):
     def ref(self, isa, os):
-        self.simpoint = 265*100E6
-        self.args = [ 'input.log', '60' ]
+        self.simpoint = 265 * 100e6
+        self.args = ["input.log", "60"]
+
     def smred(self, isa, os):
-        self.args = [ 'input.log', '1' ]
+        self.args = ["input.log", "1"]
+
     def mdred(self, isa, os):
-        self.args = [ 'input.log', '1' ]
+        self.args = ["input.log", "1"]
+
     def lgred(self, isa, os):
-        self.args = [ 'input.log', '1' ]
+        self.args = ["input.log", "1"]
+
 
 class gzip_graphic(gzip):
     def ref(self, isa, os):
-        self.simpoint = 653*100E6
-        self.args = [ 'input.graphic', '60' ]
+        self.simpoint = 653 * 100e6
+        self.args = ["input.graphic", "60"]
+
     def smred(self, isa, os):
-        self.args = [ 'input.graphic', '1' ]
+        self.args = ["input.graphic", "1"]
+
     def mdred(self, isa, os):
-        self.args = [ 'input.graphic', '1' ]
+        self.args = ["input.graphic", "1"]
+
     def lgred(self, isa, os):
-        self.args = [ 'input.graphic', '1' ]
+        self.args = ["input.graphic", "1"]
+
 
 class gzip_random(gzip):
     def ref(self, isa, os):
-        self.simpoint = 623*100E6
-        self.args = [ 'input.random', '60' ]
+        self.simpoint = 623 * 100e6
+        self.args = ["input.random", "60"]
+
     def smred(self, isa, os):
-        self.args = [ 'input.random', '1' ]
+        self.args = ["input.random", "1"]
+
     def mdred(self, isa, os):
-        self.args = [ 'input.random', '1' ]
+        self.args = ["input.random", "1"]
+
     def lgred(self, isa, os):
-        self.args = [ 'input.random', '1' ]
+        self.args = ["input.random", "1"]
+
 
 class gzip_program(gzip):
     def ref(self, isa, os):
-        self.simpoint = 1189*100E6
-        self.args = [ 'input.program', '60' ]
+        self.simpoint = 1189 * 100e6
+        self.args = ["input.program", "60"]
+
     def smred(self, isa, os):
-        self.args = [ 'input.program', '1' ]
+        self.args = ["input.program", "1"]
+
     def mdred(self, isa, os):
-        self.args = [ 'input.program', '1' ]
+        self.args = ["input.program", "1"]
+
     def lgred(self, isa, os):
-        self.args = [ 'input.program', '1' ]
+        self.args = ["input.program", "1"]
+
 
 class mcf(MinneDefaultBenchmark):
-    name = 'mcf'
+    name = "mcf"
     number = 181
-    lang = 'C'
-    args = [ 'mcf.in' ]
-    simpoint = 553*100E6
+    lang = "C"
+    args = ["mcf.in"]
+    simpoint = 553 * 100e6
+
 
 class parser(MinneDefaultBenchmark):
-    name = 'parser'
+    name = "parser"
     number = 197
-    lang = 'C'
-    args = [ '2.1.dict', '-batch' ]
-    simpoint = 1146*100E6
+    lang = "C"
+    args = ["2.1.dict", "-batch"]
+    simpoint = 1146 * 100e6
+
 
 class perlbmk(DefaultBenchmark):
-    name = 'perlbmk'
+    name = "perlbmk"
     number = 253
-    lang = 'C'
+    lang = "C"
 
     def test(self, isa, os):
-        self.args = [ '-I.', '-I', 'lib', 'test.pl' ]
-        self.stdin = 'test.in'
+        self.args = ["-I.", "-I", "lib", "test.pl"]
+        self.stdin = "test.in"
+
 
 class perlbmk_diffmail(perlbmk):
     def ref(self, isa, os):
-        self.simpoint = 141*100E6
-        self.args = [ '-I', 'lib', 'diffmail.pl', '2', '550', '15', '24',
-                      '23', '100' ]
+        self.simpoint = 141 * 100e6
+        self.args = [
+            "-I",
+            "lib",
+            "diffmail.pl",
+            "2",
+            "550",
+            "15",
+            "24",
+            "23",
+            "100",
+        ]
 
     def train(self, isa, os):
-        self.args = [ '-I', 'lib', 'diffmail.pl', '2', '350', '15', '24',
-                      '23', '150' ]
+        self.args = [
+            "-I",
+            "lib",
+            "diffmail.pl",
+            "2",
+            "350",
+            "15",
+            "24",
+            "23",
+            "150",
+        ]
+
 
 class perlbmk_scrabbl(perlbmk):
     def train(self, isa, os):
-        self.args = [ '-I.', '-I', 'lib', 'scrabbl.pl' ]
-        self.stdin = 'scrabbl.in'
+        self.args = ["-I.", "-I", "lib", "scrabbl.pl"]
+        self.stdin = "scrabbl.in"
+
 
 class perlbmk_makerand(perlbmk):
     def ref(self, isa, os):
-        self.simpoint = 11*100E6
-        self.args = [ '-I', 'lib',  'makerand.pl' ]
+        self.simpoint = 11 * 100e6
+        self.args = ["-I", "lib", "makerand.pl"]
 
     def lgred(self, isa, os):
-        self.args = [ '-I.', '-I', 'lib', 'lgred.makerand.pl' ]
+        self.args = ["-I.", "-I", "lib", "lgred.makerand.pl"]
 
     def mdred(self, isa, os):
-        self.args = [ '-I.', '-I', 'lib', 'mdred.makerand.pl' ]
+        self.args = ["-I.", "-I", "lib", "mdred.makerand.pl"]
 
     def smred(self, isa, os):
-        self.args = [ '-I.', '-I', 'lib', 'smred.makerand.pl' ]
+        self.args = ["-I.", "-I", "lib", "smred.makerand.pl"]
+
 
 class perlbmk_perfect(perlbmk):
     def ref(self, isa, os):
-        self.simpoint = 5*100E6
-        self.args = [ '-I', 'lib',  'perfect.pl', 'b', '3', 'm', '4' ]
+        self.simpoint = 5 * 100e6
+        self.args = ["-I", "lib", "perfect.pl", "b", "3", "m", "4"]
 
     def train(self, isa, os):
-        self.args = [ '-I', 'lib', 'perfect.pl', 'b',  '3' ]
+        self.args = ["-I", "lib", "perfect.pl", "b", "3"]
+
 
 class perlbmk_splitmail1(perlbmk):
     def ref(self, isa, os):
-        self.simpoint = 405*100E6
-        self.args = [ '-I', 'lib', 'splitmail.pl', '850', '5', '19',
-                      '18', '1500' ]
+        self.simpoint = 405 * 100e6
+        self.args = [
+            "-I",
+            "lib",
+            "splitmail.pl",
+            "850",
+            "5",
+            "19",
+            "18",
+            "1500",
+        ]
+
 
 class perlbmk_splitmail2(perlbmk):
     def ref(self, isa, os):
-        self.args = [ '-I', 'lib', 'splitmail.pl', '704', '12', '26',
-                      '16', '836' ]
+        self.args = [
+            "-I",
+            "lib",
+            "splitmail.pl",
+            "704",
+            "12",
+            "26",
+            "16",
+            "836",
+        ]
+
 
 class perlbmk_splitmail3(perlbmk):
     def ref(self, isa, os):
-        self.args = [ '-I', 'lib', 'splitmail.pl', '535', '13', '25',
-                      '24', '1091' ]
+        self.args = [
+            "-I",
+            "lib",
+            "splitmail.pl",
+            "535",
+            "13",
+            "25",
+            "24",
+            "1091",
+        ]
+
 
 class perlbmk_splitmail4(perlbmk):
     def ref(self, isa, os):
-        self.args = [ '-I', 'lib', 'splitmail.pl', '957', '12', '23',
-                      '26', '1014' ]
+        self.args = [
+            "-I",
+            "lib",
+            "splitmail.pl",
+            "957",
+            "12",
+            "23",
+            "26",
+            "1014",
+        ]
+
 
 class twolf(Benchmark):
-    name = 'twolf'
+    name = "twolf"
     number = 300
-    lang = 'C'
+    lang = "C"
     stdin = None
 
     def test(self, isa, os):
-        self.args = [ 'test' ]
+        self.args = ["test"]
 
     def train(self, isa, os):
-        self.args = [ 'train' ]
+        self.args = ["train"]
 
     def ref(self, isa, os):
-        self.simpoint = 1066*100E6
-        self.args = [ 'ref' ]
+        self.simpoint = 1066 * 100e6
+        self.args = ["ref"]
 
     def smred(self, isa, os):
-        self.args = [ 'smred' ]
+        self.args = ["smred"]
 
     def mdred(self, isa, os):
-        self.args = [ 'mdred' ]
+        self.args = ["mdred"]
 
     def lgred(self, isa, os):
-        self.args = [ 'lgred' ]
+        self.args = ["lgred"]
+
 
 class vortex(Benchmark):
-    name = 'vortex'
+    name = "vortex"
     number = 255
-    lang = 'C'
+    lang = "C"
     stdin = None
 
     def __init__(self, isa, os, input_set):
-        if (isa in ('arm', 'thumb', 'aarch64')):
-            self.endian = 'lendian'
-        elif (isa == 'sparc' or isa == 'sparc32'):
-            self.endian = 'bendian'
+        if isa in ("arm", "thumb", "aarch64"):
+            self.endian = "lendian"
+        elif isa == "sparc" or isa == "sparc32":
+            self.endian = "bendian"
         else:
             raise AttributeError("unknown ISA %s" % isa)
 
         super(vortex, self).__init__(isa, os, input_set)
 
     def test(self, isa, os):
-        self.args = [ '%s.raw' % self.endian ]
-        self.output = 'vortex.out'
+        self.args = ["%s.raw" % self.endian]
+        self.output = "vortex.out"
 
     def train(self, isa, os):
-        self.args = [ '%s.raw' % self.endian ]
-        self.output = 'vortex.out'
+        self.args = ["%s.raw" % self.endian]
+        self.output = "vortex.out"
 
     def smred(self, isa, os):
-        self.args = [ '%s.raw' % self.endian ]
-        self.output = 'vortex.out'
+        self.args = ["%s.raw" % self.endian]
+        self.output = "vortex.out"
 
     def mdred(self, isa, os):
-        self.args = [ '%s.raw' % self.endian ]
-        self.output = 'vortex.out'
+        self.args = ["%s.raw" % self.endian]
+        self.output = "vortex.out"
 
     def lgred(self, isa, os):
-        self.args = [ '%s.raw' % self.endian ]
-        self.output = 'vortex.out'
+        self.args = ["%s.raw" % self.endian]
+        self.output = "vortex.out"
+
 
 class vortex1(vortex):
     def ref(self, isa, os):
-        self.args = [ '%s1.raw' % self.endian ]
-        self.output = 'vortex1.out'
-        self.simpoint = 271*100E6
+        self.args = ["%s1.raw" % self.endian]
+        self.output = "vortex1.out"
+        self.simpoint = 271 * 100e6
 
 
 class vortex2(vortex):
     def ref(self, isa, os):
-        self.simpoint = 1024*100E6
-        self.args = [ '%s2.raw' % self.endian ]
-        self.output = 'vortex2.out'
+        self.simpoint = 1024 * 100e6
+        self.args = ["%s2.raw" % self.endian]
+        self.output = "vortex2.out"
+
 
 class vortex3(vortex):
     def ref(self, isa, os):
-        self.simpoint = 564*100E6
-        self.args = [ '%s3.raw' % self.endian ]
-        self.output = 'vortex3.out'
+        self.simpoint = 564 * 100e6
+        self.args = ["%s3.raw" % self.endian]
+        self.output = "vortex3.out"
+
 
 class vpr(MinneDefaultBenchmark):
-    name = 'vpr'
+    name = "vpr"
     number = 175
-    lang = 'C'
+    lang = "C"
+
 
 # not sure about vpr minnespec place.in
 class vpr_place(vpr):
-    args = [ 'net.in', 'arch.in', 'place.out', 'dum.out', '-nodisp',
-             '-place_only', '-init_t', '5', '-exit_t', '0.005',
-             '-alpha_t', '0.9412', '-inner_num', '2' ]
-    output = 'place_log.out'
+    args = [
+        "net.in",
+        "arch.in",
+        "place.out",
+        "dum.out",
+        "-nodisp",
+        "-place_only",
+        "-init_t",
+        "5",
+        "-exit_t",
+        "0.005",
+        "-alpha_t",
+        "0.9412",
+        "-inner_num",
+        "2",
+    ]
+    output = "place_log.out"
+
 
 class vpr_route(vpr):
-    simpoint = 476*100E6
-    args = [ 'net.in', 'arch.in', 'place.in', 'route.out', '-nodisp',
-             '-route_only', '-route_chan_width', '15',
-             '-pres_fac_mult', '2', '-acc_fac', '1',
-             '-first_iter_pres_fac', '4', '-initial_pres_fac', '8' ]
-    output = 'route_log.out'
+    simpoint = 476 * 100e6
+    args = [
+        "net.in",
+        "arch.in",
+        "place.in",
+        "route.out",
+        "-nodisp",
+        "-route_only",
+        "-route_chan_width",
+        "15",
+        "-pres_fac_mult",
+        "2",
+        "-acc_fac",
+        "1",
+        "-first_iter_pres_fac",
+        "4",
+        "-initial_pres_fac",
+        "8",
+    ]
+    output = "route_log.out"
 
-all = [ ammp, applu, apsi, art, art110, art470, equake, facerec, fma3d, galgel,
-        lucas, mesa, mgrid, sixtrack, swim, wupwise, bzip2_source,
-        bzip2_graphic, bzip2_program, crafty, eon_kajiya, eon_cook,
-        eon_rushmeier, gap, gcc_166, gcc_200, gcc_expr, gcc_integrate,
-        gcc_scilab, gzip_source, gzip_log, gzip_graphic, gzip_random,
-        gzip_program, mcf, parser, perlbmk_diffmail, perlbmk_makerand,
-        perlbmk_perfect, perlbmk_splitmail1, perlbmk_splitmail2,
-        perlbmk_splitmail3, perlbmk_splitmail4, twolf, vortex1, vortex2,
-        vortex3, vpr_place, vpr_route ]
 
-__all__ = [ x.__name__ for x in all ]
+all = [
+    ammp,
+    applu,
+    apsi,
+    art,
+    art110,
+    art470,
+    equake,
+    facerec,
+    fma3d,
+    galgel,
+    lucas,
+    mesa,
+    mgrid,
+    sixtrack,
+    swim,
+    wupwise,
+    bzip2_source,
+    bzip2_graphic,
+    bzip2_program,
+    crafty,
+    eon_kajiya,
+    eon_cook,
+    eon_rushmeier,
+    gap,
+    gcc_166,
+    gcc_200,
+    gcc_expr,
+    gcc_integrate,
+    gcc_scilab,
+    gzip_source,
+    gzip_log,
+    gzip_graphic,
+    gzip_random,
+    gzip_program,
+    mcf,
+    parser,
+    perlbmk_diffmail,
+    perlbmk_makerand,
+    perlbmk_perfect,
+    perlbmk_splitmail1,
+    perlbmk_splitmail2,
+    perlbmk_splitmail3,
+    perlbmk_splitmail4,
+    twolf,
+    vortex1,
+    vortex2,
+    vortex3,
+    vpr_place,
+    vpr_route,
+]
 
-if __name__ == '__main__':
+__all__ = [x.__name__ for x in all]
+
+if __name__ == "__main__":
     from pprint import pprint
+
     for bench in all:
-        for input_set in 'ref', 'test', 'train':
-            print('class: %s' % bench.__name__)
-            x = bench('x86', 'linux', input_set)
-            print('%s: %s' % (x, input_set))
+        for input_set in "ref", "test", "train":
+            print("class: %s" % bench.__name__)
+            x = bench("x86", "linux", input_set)
+            print("%s: %s" % (x, input_set))
             pprint(x.makeProcessArgs())
             print()
diff --git a/configs/dist/sw.py b/configs/dist/sw.py
index 41edf9e..7267357 100644
--- a/configs/dist/sw.py
+++ b/configs/dist/sw.py
@@ -35,33 +35,39 @@
 from m5.objects import *
 from m5.util import addToPath, fatal
 
-addToPath('../')
+addToPath("../")
 
 from common import Simulation
 from common import Options
 
+
 def build_switch(args):
     # instantiate an EtherSwitch
     switch = EtherSwitch()
     # instantiate distEtherLinks to connect switch ports
     # to other gem5 instances
-    switch.portlink = [DistEtherLink(speed = args.ethernet_linkspeed,
-                                      delay = args.ethernet_linkdelay,
-                                      dist_rank = args.dist_rank,
-                                      dist_size = args.dist_size,
-                                      server_name = args.dist_server_name,
-                                      server_port = args.dist_server_port,
-                                      sync_start = args.dist_sync_start,
-                                      sync_repeat = args.dist_sync_repeat,
-                                      is_switch = True,
-                                      num_nodes = args.dist_size)
-                       for i in range(args.dist_size)]
+    switch.portlink = [
+        DistEtherLink(
+            speed=args.ethernet_linkspeed,
+            delay=args.ethernet_linkdelay,
+            dist_rank=args.dist_rank,
+            dist_size=args.dist_size,
+            server_name=args.dist_server_name,
+            server_port=args.dist_server_port,
+            sync_start=args.dist_sync_start,
+            sync_repeat=args.dist_sync_repeat,
+            is_switch=True,
+            num_nodes=args.dist_size,
+        )
+        for i in range(args.dist_size)
+    ]
 
     for (i, link) in enumerate(switch.portlink):
         link.int0 = switch.interface[i]
 
     return switch
 
+
 def main():
     # Add options
     parser = argparse.ArgumentParser()
@@ -70,8 +76,9 @@
     args = parser.parse_args()
 
     system = build_switch(args)
-    root = Root(full_system = True, system = system)
+    root = Root(full_system=True, system=system)
     Simulation.run(args, root, None, None)
 
+
 if __name__ == "__m5_main__":
     main()
diff --git a/configs/dram/lat_mem_rd.py b/configs/dram/lat_mem_rd.py
index d69457d..74a9499 100644
--- a/configs/dram/lat_mem_rd.py
+++ b/configs/dram/lat_mem_rd.py
@@ -42,11 +42,11 @@
 from m5.util import addToPath
 from m5.stats import periodicStatDump
 
-addToPath('../')
+addToPath("../")
 from common import ObjectList
 from common import MemConfig
 
-addToPath('../../util')
+addToPath("../../util")
 import protolib
 
 # this script is helpful to observe the memory latency for various
@@ -61,8 +61,15 @@
 except:
     print("Did not find packet proto definitions, attempting to generate")
     from subprocess import call
-    error = call(['protoc', '--python_out=configs/dram',
-                  '--proto_path=src/proto', 'src/proto/packet.proto'])
+
+    error = call(
+        [
+            "protoc",
+            "--python_out=configs/dram",
+            "--proto_path=src/proto",
+            "src/proto/packet.proto",
+        ]
+    )
     if not error:
         print("Generated packet proto definitions")
 
@@ -79,24 +86,34 @@
 
 parser = argparse.ArgumentParser()
 
-parser.add_argument("--mem-type", default="DDR3_1600_8x8",
-                    choices=ObjectList.mem_list.get_names(),
-                    help = "type of memory to use")
-parser.add_argument("--mem-size", action="store", type=str,
-                    default="16MB",
-                    help="Specify the memory size")
-parser.add_argument("--reuse-trace", action="store_true",
-                    help="Prevent generation of traces and reuse existing")
+parser.add_argument(
+    "--mem-type",
+    default="DDR3_1600_8x8",
+    choices=ObjectList.mem_list.get_names(),
+    help="type of memory to use",
+)
+parser.add_argument(
+    "--mem-size",
+    action="store",
+    type=str,
+    default="16MB",
+    help="Specify the memory size",
+)
+parser.add_argument(
+    "--reuse-trace",
+    action="store_true",
+    help="Prevent generation of traces and reuse existing",
+)
 
 args = parser.parse_args()
 
 # start by creating the system itself, using a multi-layer 2.0 GHz
 # crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
 # amounts to 42.7 GByte/s per layer and thus per port
-system = System(membus = SystemXBar(width = 32))
-system.clk_domain = SrcClockDomain(clock = '2.0GHz',
-                                   voltage_domain =
-                                   VoltageDomain(voltage = '1V'))
+system = System(membus=SystemXBar(width=32))
+system.clk_domain = SrcClockDomain(
+    clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
+)
 
 mem_range = AddrRange(args.mem_size)
 system.mem_ranges = [mem_range]
@@ -122,12 +139,12 @@
     if isinstance(ctrl, m5.objects.MemCtrl):
         # make the DRAM refresh interval sufficiently infinite to avoid
         # latency spikes
-        ctrl.tREFI = '100s'
+        ctrl.tREFI = "100s"
 
 # use the same concept as the utilisation sweep, and print the config
 # so that we can later read it in
 cfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg")
-cfg_file = open(cfg_file_name, 'w')
+cfg_file = open(cfg_file_name, "w")
 
 # set an appropriate burst length in bytes
 burst_size = 64
@@ -137,6 +154,7 @@
 def is_pow2(num):
     return num != 0 and ((num & (num - 1)) == 0)
 
+
 # assume we start every range at 0
 max_range = int(mem_range.end)
 
@@ -164,7 +182,7 @@
 # the actual measurement
 def create_trace(filename, max_addr, burst_size, itt):
     try:
-        proto_out = gzip.open(filename, 'wb')
+        proto_out = gzip.open(filename, "wb")
     except IOError:
         print("Failed to open ", filename, " for writing")
         exit(-1)
@@ -184,6 +202,7 @@
     addrs = list(range(0, max_addr, burst_size))
 
     import random
+
     random.shuffle(addrs)
 
     tick = 0
@@ -202,6 +221,7 @@
 
     proto_out.close()
 
+
 # this will take a while, so keep the user informed
 print("Generating traces, please wait...")
 
@@ -211,22 +231,23 @@
 
 # now we create the states for each range
 for r in ranges:
-    filename = os.path.join(m5.options.outdir,
-                            'lat_mem_rd%d.trc.gz' % nxt_range)
+    filename = os.path.join(
+        m5.options.outdir, "lat_mem_rd%d.trc.gz" % nxt_range
+    )
 
     if not args.reuse_trace:
         # create the actual random trace for this range
         create_trace(filename, r, burst_size, itt)
 
     # the warming state
-    cfg_file.write("STATE %d %d TRACE %s 0\n" %
-                   (nxt_state, period, filename))
+    cfg_file.write("STATE %d %d TRACE %s 0\n" % (nxt_state, period, filename))
     nxt_state = nxt_state + 1
 
     # the measuring states
     for i in range(iterations):
-        cfg_file.write("STATE %d %d TRACE %s 0\n" %
-                       (nxt_state, period, filename))
+        cfg_file.write(
+            "STATE %d %d TRACE %s 0\n" % (nxt_state, period, filename)
+        )
         nxt_state = nxt_state + 1
 
     nxt_range = nxt_range + 1
@@ -242,8 +263,7 @@
 cfg_file.close()
 
 # create a traffic generator, and point it to the file we just created
-system.tgen = TrafficGen(config_file = cfg_file_name,
-                         progress_check = '10s')
+system.tgen = TrafficGen(config_file=cfg_file_name, progress_check="10s")
 
 # add a communication monitor
 system.monitor = CommMonitor()
@@ -267,19 +287,20 @@
     tgts_per_mshr = 12
     write_buffers = 16
 
+
 # note that everything is in the same clock domain, 2.0 GHz as
 # specified above
-system.l1cache = L1_DCache(size = '64kB')
+system.l1cache = L1_DCache(size="64kB")
 system.monitor.mem_side_port = system.l1cache.cpu_side
 
-system.l2cache = L2Cache(size = '512kB', writeback_clean = True)
+system.l2cache = L2Cache(size="512kB", writeback_clean=True)
 system.l2cache.xbar = L2XBar()
 system.l1cache.mem_side = system.l2cache.xbar.cpu_side_ports
 system.l2cache.cpu_side = system.l2cache.xbar.mem_side_ports
 
 # make the L3 mostly exclusive, and correspondingly ensure that the L2
 # writes back also clean lines to the L3
-system.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl')
+system.l3cache = L3Cache(size="4MB", clusivity="mostly_excl")
 system.l3cache.xbar = L2XBar()
 system.l2cache.mem_side = system.l3cache.xbar.cpu_side_ports
 system.l3cache.cpu_side = system.l3cache.xbar.mem_side_ports
@@ -292,8 +313,8 @@
 periodicStatDump(period)
 
 # run Forrest, run!
-root = Root(full_system = False, system = system)
-root.system.mem_mode = 'timing'
+root = Root(full_system=False, system=system)
+root.system.mem_mode = "timing"
 
 m5.instantiate()
 m5.simulate(nxt_state * period)
diff --git a/configs/dram/low_power_sweep.py b/configs/dram/low_power_sweep.py
index 5147007..7f8591b 100644
--- a/configs/dram/low_power_sweep.py
+++ b/configs/dram/low_power_sweep.py
@@ -40,7 +40,7 @@
 from m5.util import addToPath
 from m5.stats import periodicStatDump
 
-addToPath('../')
+addToPath("../")
 
 from common import ObjectList
 from common import MemConfig
@@ -52,46 +52,70 @@
 # through an idle state with no requests to enforce self-refresh.
 
 parser = argparse.ArgumentParser(
-  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter
+)
 
 # Use a single-channel DDR4-2400 in 16x4 configuration by default
-parser.add_argument("--mem-type", default="DDR4_2400_16x4",
-                    choices=ObjectList.mem_list.get_names(),
-                    help = "type of memory to use")
+parser.add_argument(
+    "--mem-type",
+    default="DDR4_2400_16x4",
+    choices=ObjectList.mem_list.get_names(),
+    help="type of memory to use",
+)
 
-parser.add_argument("--mem-ranks", "-r", type=int, default=1,
-                    help = "Number of ranks to iterate across")
+parser.add_argument(
+    "--mem-ranks",
+    "-r",
+    type=int,
+    default=1,
+    help="Number of ranks to iterate across",
+)
 
-parser.add_argument("--page-policy", "-p",
-                    choices=["close_adaptive", "open_adaptive"],
-                    default="close_adaptive", help="controller page policy")
+parser.add_argument(
+    "--page-policy",
+    "-p",
+    choices=["close_adaptive", "open_adaptive"],
+    default="close_adaptive",
+    help="controller page policy",
+)
 
-parser.add_argument("--itt-list", "-t", default="1 20 100",
-                    help="a list of multipliers for the max value of itt, " \
-                    "e.g. \"1 20 100\"")
+parser.add_argument(
+    "--itt-list",
+    "-t",
+    default="1 20 100",
+    help="a list of multipliers for the max value of itt, " 'e.g. "1 20 100"',
+)
 
-parser.add_argument("--rd-perc", type=int, default=100,
-                    help = "Percentage of read commands")
+parser.add_argument(
+    "--rd-perc", type=int, default=100, help="Percentage of read commands"
+)
 
-parser.add_argument("--addr-map",
-                    choices=m5.objects.AddrMap.vals,
-                    default="RoRaBaCoCh", help = "DRAM address map policy")
+parser.add_argument(
+    "--addr-map",
+    choices=m5.objects.AddrMap.vals,
+    default="RoRaBaCoCh",
+    help="DRAM address map policy",
+)
 
-parser.add_argument("--idle-end", type=int, default=50000000,
-                    help = "time in ps of an idle period at the end ")
+parser.add_argument(
+    "--idle-end",
+    type=int,
+    default=50000000,
+    help="time in ps of an idle period at the end ",
+)
 
 args = parser.parse_args()
 
 # Start with the system itself, using a multi-layer 2.0 GHz
 # crossbar, delivering 64 bytes / 3 cycles (one header cycle)
 # which amounts to 42.7 GByte/s per layer and thus per port.
-system = System(membus = IOXBar(width = 32))
-system.clk_domain = SrcClockDomain(clock = '2.0GHz',
-                                   voltage_domain =
-                                   VoltageDomain(voltage = '1V'))
+system = System(membus=IOXBar(width=32))
+system.clk_domain = SrcClockDomain(
+    clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
+)
 
 # We are fine with 256 MB memory for now.
-mem_range = AddrRange('256MB')
+mem_range = AddrRange("256MB")
 # Start address is 0
 system.mem_ranges = [mem_range]
 
@@ -130,20 +154,27 @@
 
 # We specify the states in a config file input to the traffic generator.
 cfg_file_name = "lowp_sweep.cfg"
-cfg_file_path = os.path.dirname(__file__) + "/" +cfg_file_name
-cfg_file = open(cfg_file_path, 'w')
+cfg_file_path = os.path.dirname(__file__) + "/" + cfg_file_name
+cfg_file = open(cfg_file_path, "w")
 
 # Get the number of banks
 nbr_banks = int(system.mem_ctrls[0].dram.banks_per_rank.value)
 
 # determine the burst size in bytes
-burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
-                  system.mem_ctrls[0].dram.device_bus_width.value *
-                  system.mem_ctrls[0].dram.burst_length.value) / 8)
+burst_size = int(
+    (
+        system.mem_ctrls[0].dram.devices_per_rank.value
+        * system.mem_ctrls[0].dram.device_bus_width.value
+        * system.mem_ctrls[0].dram.burst_length.value
+    )
+    / 8
+)
 
 # next, get the page size in bytes (the rowbuffer size is already in bytes)
-page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
-    system.mem_ctrls[0].dram.device_rowbuffer_size.value
+page_size = (
+    system.mem_ctrls[0].dram.devices_per_rank.value
+    * system.mem_ctrls[0].dram.device_rowbuffer_size.value
+)
 
 # Inter-request delay should be such that we can hit as many transitions
 # to/from low power states as possible to. We provide a min and max itt to the
@@ -151,23 +182,25 @@
 # seconds and we need it in ticks (ps).
 itt_min = system.mem_ctrls[0].dram.tBURST.value * 1000000000000
 
-#The itt value when set to (tRAS + tRP + tCK) covers the case where
+# The itt value when set to (tRAS + tRP + tCK) covers the case where
 # a read command is delayed beyond the delay from ACT to PRE_PDN entry of the
 # previous command. For write command followed by precharge, this delay
 # between a write and power down entry will be tRCD + tCL + tWR + tRP + tCK.
 # As we use this delay as a unit and create multiples of it as bigger delays
 # for the sweep, this parameter works for reads, writes and mix of them.
-pd_entry_time = (system.mem_ctrls[0].dram.tRAS.value +
-                 system.mem_ctrls[0].dram.tRP.value +
-                 system.mem_ctrls[0].dram.tCK.value) * 1000000000000
+pd_entry_time = (
+    system.mem_ctrls[0].dram.tRAS.value
+    + system.mem_ctrls[0].dram.tRP.value
+    + system.mem_ctrls[0].dram.tCK.value
+) * 1000000000000
 
 # We sweep itt max using the multipliers specified by the user.
 itt_max_str = args.itt_list.strip().split()
-itt_max_multiples = [ int(x) for x in itt_max_str ]
+itt_max_multiples = [int(x) for x in itt_max_str]
 if len(itt_max_multiples) == 0:
     fatal("String for itt-max-list detected empty\n")
 
-itt_max_values = [ pd_entry_time * m for m in itt_max_multiples ]
+itt_max_values = [pd_entry_time * m for m in itt_max_multiples]
 
 # Generate request addresses in the entire range, assume we start at 0
 max_addr = mem_range.end
@@ -180,12 +213,14 @@
 
 # be selective about bank utilization instead of going from 1 to the number of
 # banks
-bank_util_values = [1, int(nbr_banks/2), nbr_banks]
+bank_util_values = [1, int(nbr_banks / 2), nbr_banks]
 
 # Next we create the config file, but first a comment
-cfg_file.write("""# STATE state# period mode=DRAM
+cfg_file.write(
+    """# STATE state# period mode=DRAM
 # read_percent start_addr end_addr req_size min_itt max_itt data_limit
-# stride_size page_size #banks #banks_util addr_map #ranks\n""")
+# stride_size page_size #banks #banks_util addr_map #ranks\n"""
+)
 
 addr_map = m5.objects.AddrMap.map[args.addr_map]
 
@@ -193,12 +228,27 @@
 for itt_max in itt_max_values:
     for bank in bank_util_values:
         for stride_size in stride_values:
-            cfg_file.write("STATE %d %d %s %d 0 %d %d "
-                           "%d %d %d %d %d %d %d %d %d\n" %
-                           (nxt_state, period, "DRAM", args.rd_perc, max_addr,
-                            burst_size, itt_min, itt_max, 0, stride_size,
-                            page_size, nbr_banks, bank, addr_map,
-                            args.mem_ranks))
+            cfg_file.write(
+                "STATE %d %d %s %d 0 %d %d "
+                "%d %d %d %d %d %d %d %d %d\n"
+                % (
+                    nxt_state,
+                    period,
+                    "DRAM",
+                    args.rd_perc,
+                    max_addr,
+                    burst_size,
+                    itt_min,
+                    itt_max,
+                    0,
+                    stride_size,
+                    page_size,
+                    nbr_banks,
+                    bank,
+                    addr_map,
+                    args.mem_ranks,
+                )
+            )
             nxt_state = nxt_state + 1
 
 # State for idle period
@@ -217,7 +267,7 @@
 cfg_file.close()
 
 # create a traffic generator, and point it to the file we just created
-system.tgen = TrafficGen(config_file = cfg_file_path)
+system.tgen = TrafficGen(config_file=cfg_file_path)
 
 # add a communication monitor
 system.monitor = CommMonitor()
@@ -232,8 +282,8 @@
 # every period, dump and reset all stats
 periodicStatDump(period)
 
-root = Root(full_system = False, system = system)
-root.system.mem_mode = 'timing'
+root = Root(full_system=False, system=system)
+root.system.mem_mode = "timing"
 
 m5.instantiate()
 
@@ -242,8 +292,10 @@
 m5.simulate(nxt_state * period + idle_period)
 print("--- Done DRAM low power sweep ---")
 print("Fixed params - ")
-print("\tburst: %d, banks: %d, max stride: %d, itt min: %s ns" %  \
-  (burst_size, nbr_banks, max_stride, itt_min))
+print(
+    "\tburst: %d, banks: %d, max stride: %d, itt min: %s ns"
+    % (burst_size, nbr_banks, max_stride, itt_min)
+)
 print("Swept params - ")
 print("\titt max multiples input:", itt_max_multiples)
 print("\titt max values", itt_max_values)
diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py
index 0205f0d..ca7b70d 100644
--- a/configs/dram/sweep.py
+++ b/configs/dram/sweep.py
@@ -41,7 +41,7 @@
 from m5.util import addToPath
 from m5.stats import periodicStatDump
 
-addToPath('../')
+addToPath("../")
 
 from common import ObjectList
 from common import MemConfig
@@ -54,29 +54,44 @@
 parser = argparse.ArgumentParser()
 
 dram_generators = {
-    "DRAM" : lambda x: x.createDram,
-    "DRAM_ROTATE" : lambda x: x.createDramRot,
+    "DRAM": lambda x: x.createDram,
+    "DRAM_ROTATE": lambda x: x.createDramRot,
 }
 
 # Use a single-channel DDR3-1600 x64 (8x8 topology) by default
-parser.add_argument("--mem-type", default="DDR3_1600_8x8",
-                    choices=ObjectList.mem_list.get_names(),
-                    help = "type of memory to use")
+parser.add_argument(
+    "--mem-type",
+    default="DDR3_1600_8x8",
+    choices=ObjectList.mem_list.get_names(),
+    help="type of memory to use",
+)
 
-parser.add_argument("--mem-ranks", "-r", type=int, default=1,
-                    help = "Number of ranks to iterate across")
+parser.add_argument(
+    "--mem-ranks",
+    "-r",
+    type=int,
+    default=1,
+    help="Number of ranks to iterate across",
+)
 
-parser.add_argument("--rd_perc", type=int, default=100,
-                    help = "Percentage of read commands")
+parser.add_argument(
+    "--rd_perc", type=int, default=100, help="Percentage of read commands"
+)
 
-parser.add_argument("--mode", default="DRAM",
-                    choices=list(dram_generators.keys()),
-                    help = "DRAM: Random traffic; \
-                          DRAM_ROTATE: Traffic rotating across banks and ranks")
+parser.add_argument(
+    "--mode",
+    default="DRAM",
+    choices=list(dram_generators.keys()),
+    help="DRAM: Random traffic; \
+                          DRAM_ROTATE: Traffic rotating across banks and ranks",
+)
 
-parser.add_argument("--addr-map",
-                    choices=ObjectList.dram_addr_map_list.get_names(),
-                    default="RoRaBaCoCh", help = "DRAM address map policy")
+parser.add_argument(
+    "--addr-map",
+    choices=ObjectList.dram_addr_map_list.get_names(),
+    default="RoRaBaCoCh",
+    help="DRAM address map policy",
+)
 
 args = parser.parse_args()
 
@@ -86,13 +101,13 @@
 # start with the system itself, using a multi-layer 2.0 GHz
 # crossbar, delivering 64 bytes / 3 cycles (one header cycle)
 # which amounts to 42.7 GByte/s per layer and thus per port
-system = System(membus = IOXBar(width = 32))
-system.clk_domain = SrcClockDomain(clock = '2.0GHz',
-                                   voltage_domain =
-                                   VoltageDomain(voltage = '1V'))
+system = System(membus=IOXBar(width=32))
+system.clk_domain = SrcClockDomain(
+    clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
+)
 
 # we are fine with 256 MB memory for now
-mem_range = AddrRange('256MB')
+mem_range = AddrRange("256MB")
 system.mem_ranges = [mem_range]
 
 # do not worry about reserving space for the backing store
@@ -131,18 +146,31 @@
 nbr_banks = system.mem_ctrls[0].dram.banks_per_rank.value
 
 # determine the burst length in bytes
-burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
-                  system.mem_ctrls[0].dram.device_bus_width.value *
-                  system.mem_ctrls[0].dram.burst_length.value) / 8)
+burst_size = int(
+    (
+        system.mem_ctrls[0].dram.devices_per_rank.value
+        * system.mem_ctrls[0].dram.device_bus_width.value
+        * system.mem_ctrls[0].dram.burst_length.value
+    )
+    / 8
+)
 
 # next, get the page size in bytes
-page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
-    system.mem_ctrls[0].dram.device_rowbuffer_size.value
+page_size = (
+    system.mem_ctrls[0].dram.devices_per_rank.value
+    * system.mem_ctrls[0].dram.device_rowbuffer_size.value
+)
 
 # match the maximum bandwidth of the memory, the parameter is in seconds
 # and we need it in ticks (ps)
-itt =  getattr(system.mem_ctrls[0].dram.tBURST_MIN, 'value',
-               system.mem_ctrls[0].dram.tBURST.value) * 1000000000000
+itt = (
+    getattr(
+        system.mem_ctrls[0].dram.tBURST_MIN,
+        "value",
+        system.mem_ctrls[0].dram.tBURST.value,
+    )
+    * 1000000000000
+)
 
 # assume we start at 0
 max_addr = mem_range.end
@@ -168,27 +196,43 @@
 periodicStatDump(period)
 
 # run Forrest, run!
-root = Root(full_system = False, system = system)
-root.system.mem_mode = 'timing'
+root = Root(full_system=False, system=system)
+root.system.mem_mode = "timing"
 
 m5.instantiate()
 
+
 def trace():
     addr_map = ObjectList.dram_addr_map_list.get(args.addr_map)
     generator = dram_generators[args.mode](system.tgen)
     for stride_size in range(burst_size, max_stride + 1, burst_size):
         for bank in range(1, nbr_banks + 1):
             num_seq_pkts = int(math.ceil(float(stride_size) / burst_size))
-            yield generator(period,
-                            0, max_addr, burst_size, int(itt), int(itt),
-                            args.rd_perc, 0,
-                            num_seq_pkts, page_size, nbr_banks, bank,
-                            addr_map, args.mem_ranks)
+            yield generator(
+                period,
+                0,
+                max_addr,
+                burst_size,
+                int(itt),
+                int(itt),
+                args.rd_perc,
+                0,
+                num_seq_pkts,
+                page_size,
+                nbr_banks,
+                bank,
+                addr_map,
+                args.mem_ranks,
+            )
     yield system.tgen.createExit(0)
 
+
 system.tgen.start(trace())
 
 m5.simulate()
 
-print("DRAM sweep with burst: %d, banks: %d, max stride: %d, request \
-       generation period: %d" % (burst_size, nbr_banks, max_stride, itt))
+print(
+    "DRAM sweep with burst: %d, banks: %d, max stride: %d, request \
+       generation period: %d"
+    % (burst_size, nbr_banks, max_stride, itt)
+)
diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py
index b5fb9ff..acf527b 100644
--- a/configs/example/apu_se.py
+++ b/configs/example/apu_se.py
@@ -35,8 +35,10 @@
 import m5
 from m5.objects import *
 from m5.util import addToPath
+from gem5.isas import ISA
+from gem5.runtime import get_runtime_isa
 
-addToPath('../')
+addToPath("../")
 
 from ruby import Ruby
 
@@ -53,143 +55,307 @@
 Options.addCommonOptions(parser)
 Options.addSEOptions(parser)
 
-parser.add_argument("--cpu-only-mode", action="store_true", default=False,
-                    help="APU mode. Used to take care of problems in "
-                    "Ruby.py while running APU protocols")
-parser.add_argument("-u", "--num-compute-units", type=int, default=4,
-                    help="number of GPU compute units"),
-parser.add_argument("--num-cp", type=int, default=0,
-                    help="Number of GPU Command Processors (CP)")
-parser.add_argument("--benchmark-root",
-                    help="Root of benchmark directory tree")
+parser.add_argument(
+    "--cpu-only-mode",
+    action="store_true",
+    default=False,
+    help="APU mode. Used to take care of problems in "
+    "Ruby.py while running APU protocols",
+)
+parser.add_argument(
+    "-u",
+    "--num-compute-units",
+    type=int,
+    default=4,
+    help="number of GPU compute units",
+),
+parser.add_argument(
+    "--num-cp",
+    type=int,
+    default=0,
+    help="Number of GPU Command Processors (CP)",
+)
+parser.add_argument(
+    "--benchmark-root", help="Root of benchmark directory tree"
+)
 
 # not super important now, but to avoid putting the number 4 everywhere, make
 # it an option/knob
-parser.add_argument("--cu-per-sqc", type=int, default=4, help="number of CUs"
-                    "sharing an SQC (icache, and thus icache TLB)")
-parser.add_argument('--cu-per-scalar-cache', type=int, default=4,
-                    help='Number of CUs sharing a scalar cache')
-parser.add_argument("--simds-per-cu", type=int, default=4, help="SIMD units"
-                    "per CU")
-parser.add_argument('--cu-per-sa', type=int, default=4,
-                    help='Number of CUs per shader array. This must be a '
-                    'multiple of options.cu-per-sqc and options.cu-per-scalar')
-parser.add_argument('--sa-per-complex', type=int, default=1,
-                    help='Number of shader arrays per complex')
-parser.add_argument('--num-gpu-complexes', type=int, default=1,
-                    help='Number of GPU complexes')
-parser.add_argument("--wf-size", type=int, default=64,
-                    help="Wavefront size(in workitems)")
-parser.add_argument("--sp-bypass-path-length", type=int, default=4,
-                    help="Number of stages of bypass path in vector ALU for "
-                    "Single Precision ops")
-parser.add_argument("--dp-bypass-path-length", type=int, default=4,
-                    help="Number of stages of bypass path in vector ALU for "
-                    "Double Precision ops")
+parser.add_argument(
+    "--cu-per-sqc",
+    type=int,
+    default=4,
+    help="number of CUs" "sharing an SQC (icache, and thus icache TLB)",
+)
+parser.add_argument(
+    "--cu-per-scalar-cache",
+    type=int,
+    default=4,
+    help="Number of CUs sharing a scalar cache",
+)
+parser.add_argument(
+    "--simds-per-cu", type=int, default=4, help="SIMD units" "per CU"
+)
+parser.add_argument(
+    "--cu-per-sa",
+    type=int,
+    default=4,
+    help="Number of CUs per shader array. This must be a "
+    "multiple of options.cu-per-sqc and options.cu-per-scalar",
+)
+parser.add_argument(
+    "--sa-per-complex",
+    type=int,
+    default=1,
+    help="Number of shader arrays per complex",
+)
+parser.add_argument(
+    "--num-gpu-complexes", type=int, default=1, help="Number of GPU complexes"
+)
+parser.add_argument(
+    "--wf-size", type=int, default=64, help="Wavefront size(in workitems)"
+)
+parser.add_argument(
+    "--sp-bypass-path-length",
+    type=int,
+    default=4,
+    help="Number of stages of bypass path in vector ALU for "
+    "Single Precision ops",
+)
+parser.add_argument(
+    "--dp-bypass-path-length",
+    type=int,
+    default=4,
+    help="Number of stages of bypass path in vector ALU for "
+    "Double Precision ops",
+)
 # issue period per SIMD unit: number of cycles before issuing another vector
 parser.add_argument(
-    "--issue-period", type=int, default=4,
-    help="Number of cycles per vector instruction issue period")
-parser.add_argument("--glbmem-wr-bus-width", type=int, default=32,
-                    help="VGPR to Coalescer (Global Memory) data bus width "
-                    "in bytes")
-parser.add_argument("--glbmem-rd-bus-width", type=int, default=32,
-                    help="Coalescer to VGPR (Global Memory) data bus width in "
-                    "bytes")
-# Currently we only support 1 local memory pipe
-parser.add_argument("--shr-mem-pipes-per-cu", type=int, default=1,
-                    help="Number of Shared Memory pipelines per CU")
-# Currently we only support 1 global memory pipe
-parser.add_argument("--glb-mem-pipes-per-cu", type=int, default=1,
-                    help="Number of Global Memory pipelines per CU")
-parser.add_argument("--wfs-per-simd", type=int, default=10, help="Number of "
-                    "WF slots per SIMD")
-
-parser.add_argument("--registerManagerPolicy", type=str, default="static",
-                    help="Register manager policy")
-parser.add_argument("--vreg-file-size", type=int, default=2048,
-                    help="number of physical vector registers per SIMD")
-parser.add_argument("--vreg-min-alloc", type=int, default=4,
-                    help="Minimum number of registers that can be allocated "
-                    "from the VRF. The total number of registers will be "
-                    "aligned to this value.")
-
-parser.add_argument("--sreg-file-size", type=int, default=2048,
-                    help="number of physical vector registers per SIMD")
-parser.add_argument("--sreg-min-alloc", type=int, default=4,
-                    help="Minimum number of registers that can be allocated "
-                    "from the SRF. The total number of registers will be "
-                    "aligned to this value.")
-
-parser.add_argument("--bw-scalor", type=int, default=0,
-                    help="bandwidth scalor for scalability analysis")
-parser.add_argument("--CPUClock", type=str, default="2GHz",
-                    help="CPU clock")
-parser.add_argument("--gpu-clock", type=str, default="1GHz",
-                    help="GPU clock")
-parser.add_argument("--cpu-voltage", action="store", type=str,
-                    default='1.0V',
-                    help="""CPU  voltage domain""")
-parser.add_argument("--gpu-voltage", action="store", type=str,
-                    default='1.0V',
-                    help="""CPU  voltage domain""")
-parser.add_argument("--CUExecPolicy", type=str, default="OLDEST-FIRST",
-                    help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
-parser.add_argument("--SegFaultDebug", action="store_true",
-                    help="checks for GPU seg fault before TLB access")
-parser.add_argument("--FunctionalTLB", action="store_true",
-                    help="Assumes TLB has no latency")
-parser.add_argument("--LocalMemBarrier", action="store_true",
-                    help="Barrier does not wait for writethroughs to complete")
+    "--issue-period",
+    type=int,
+    default=4,
+    help="Number of cycles per vector instruction issue period",
+)
 parser.add_argument(
-    "--countPages", action="store_true",
-    help="Count Page Accesses and output in per-CU output files")
-parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth for"
-                    "TLBs")
-parser.add_argument("--pf-type", type=str, help="type of prefetch: "
-                    "PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
-parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
-parser.add_argument("--numLdsBanks", type=int, default=32,
-                    help="number of physical banks per LDS module")
-parser.add_argument("--ldsBankConflictPenalty", type=int, default=1,
-                    help="number of cycles per LDS bank conflict")
-parser.add_argument("--lds-size", type=int, default=65536,
-                    help="Size of the LDS in bytes")
-parser.add_argument('--fast-forward-pseudo-op', action='store_true',
-                    help='fast forward using kvm until the m5_switchcpu'
-                    ' pseudo-op is encountered, then switch cpus. subsequent'
-                    ' m5_switchcpu pseudo-ops will toggle back and forth')
-parser.add_argument("--num-hw-queues", type=int, default=10,
-                    help="number of hw queues in packet processor")
-parser.add_argument("--reg-alloc-policy", type=str, default="dynamic",
-                    help="register allocation policy (simple/dynamic)")
+    "--glbmem-wr-bus-width",
+    type=int,
+    default=32,
+    help="VGPR to Coalescer (Global Memory) data bus width " "in bytes",
+)
+parser.add_argument(
+    "--glbmem-rd-bus-width",
+    type=int,
+    default=32,
+    help="Coalescer to VGPR (Global Memory) data bus width in " "bytes",
+)
+# Currently we only support 1 local memory pipe
+parser.add_argument(
+    "--shr-mem-pipes-per-cu",
+    type=int,
+    default=1,
+    help="Number of Shared Memory pipelines per CU",
+)
+# Currently we only support 1 global memory pipe
+parser.add_argument(
+    "--glb-mem-pipes-per-cu",
+    type=int,
+    default=1,
+    help="Number of Global Memory pipelines per CU",
+)
+parser.add_argument(
+    "--wfs-per-simd",
+    type=int,
+    default=10,
+    help="Number of " "WF slots per SIMD",
+)
 
-parser.add_argument("--dgpu", action="store_true", default=False,
-                    help="Configure the system as a dGPU instead of an APU. "
-                    "The dGPU config has its own local memory pool and is not "
-                    "coherent with the host through hardware.  Data is "
-                    "transfered from host to device memory using runtime calls "
-                    "that copy data over a PCIe-like IO bus.")
+parser.add_argument(
+    "--registerManagerPolicy",
+    type=str,
+    default="static",
+    help="Register manager policy",
+)
+parser.add_argument(
+    "--vreg-file-size",
+    type=int,
+    default=2048,
+    help="number of physical vector registers per SIMD",
+)
+parser.add_argument(
+    "--vreg-min-alloc",
+    type=int,
+    default=4,
+    help="Minimum number of registers that can be allocated "
+    "from the VRF. The total number of registers will be "
+    "aligned to this value.",
+)
+
+parser.add_argument(
+    "--sreg-file-size",
+    type=int,
+    default=2048,
+    help="number of physical vector registers per SIMD",
+)
+parser.add_argument(
+    "--sreg-min-alloc",
+    type=int,
+    default=4,
+    help="Minimum number of registers that can be allocated "
+    "from the SRF. The total number of registers will be "
+    "aligned to this value.",
+)
+
+parser.add_argument(
+    "--bw-scalor",
+    type=int,
+    default=0,
+    help="bandwidth scalor for scalability analysis",
+)
+parser.add_argument("--CPUClock", type=str, default="2GHz", help="CPU clock")
+parser.add_argument("--gpu-clock", type=str, default="1GHz", help="GPU clock")
+parser.add_argument(
+    "--cpu-voltage",
+    action="store",
+    type=str,
+    default="1.0V",
+    help="""CPU  voltage domain""",
+)
+parser.add_argument(
+    "--gpu-voltage",
+    action="store",
+    type=str,
+    default="1.0V",
+    help="""CPU  voltage domain""",
+)
+parser.add_argument(
+    "--CUExecPolicy",
+    type=str,
+    default="OLDEST-FIRST",
+    help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)",
+)
+parser.add_argument(
+    "--SegFaultDebug",
+    action="store_true",
+    help="checks for GPU seg fault before TLB access",
+)
+parser.add_argument(
+    "--FunctionalTLB", action="store_true", help="Assumes TLB has no latency"
+)
+parser.add_argument(
+    "--LocalMemBarrier",
+    action="store_true",
+    help="Barrier does not wait for writethroughs to complete",
+)
+parser.add_argument(
+    "--countPages",
+    action="store_true",
+    help="Count Page Accesses and output in per-CU output files",
+)
+parser.add_argument(
+    "--max-cu-tokens",
+    type=int,
+    default=4,
+    help="Number of coalescer tokens per CU",
+)
+parser.add_argument(
+    "--vrf_lm_bus_latency",
+    type=int,
+    default=1,
+    help="Latency while accessing shared memory",
+)
+parser.add_argument(
+    "--mem-req-latency",
+    type=int,
+    default=50,
+    help="Latency for requests from the cu to ruby.",
+)
+parser.add_argument(
+    "--mem-resp-latency",
+    type=int,
+    default=50,
+    help="Latency for responses from ruby to the cu.",
+)
+parser.add_argument(
+    "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs"
+)
+parser.add_argument(
+    "--pf-type",
+    type=str,
+    help="type of prefetch: " "PF_CU, PF_WF, PF_PHASE, PF_STRIDE",
+)
+parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
+parser.add_argument(
+    "--numLdsBanks",
+    type=int,
+    default=32,
+    help="number of physical banks per LDS module",
+)
+parser.add_argument(
+    "--ldsBankConflictPenalty",
+    type=int,
+    default=1,
+    help="number of cycles per LDS bank conflict",
+)
+parser.add_argument(
+    "--lds-size", type=int, default=65536, help="Size of the LDS in bytes"
+)
+parser.add_argument(
+    "--fast-forward-pseudo-op",
+    action="store_true",
+    help="fast forward using kvm until the m5_switchcpu"
+    " pseudo-op is encountered, then switch cpus. subsequent"
+    " m5_switchcpu pseudo-ops will toggle back and forth",
+)
+parser.add_argument(
+    "--num-hw-queues",
+    type=int,
+    default=10,
+    help="number of hw queues in packet processor",
+)
+parser.add_argument(
+    "--reg-alloc-policy",
+    type=str,
+    default="dynamic",
+    help="register allocation policy (simple/dynamic)",
+)
+
+parser.add_argument(
+    "--dgpu",
+    action="store_true",
+    default=False,
+    help="Configure the system as a dGPU instead of an APU. "
+    "The dGPU config has its own local memory pool and is not "
+    "coherent with the host through hardware.  Data is "
+    "transfered from host to device memory using runtime calls "
+    "that copy data over a PCIe-like IO bus.",
+)
 
 # Mtype option
-#--     1   1   1   C_RW_S  (Cached-ReadWrite-Shared)
-#--     1   1   0   C_RW_US (Cached-ReadWrite-Unshared)
-#--     1   0   1   C_RO_S  (Cached-ReadOnly-Shared)
-#--     1   0   0   C_RO_US (Cached-ReadOnly-Unshared)
-#--     0   1   x   UC_L2   (Uncached_GL2)
-#--     0   0   x   UC_All  (Uncached_All_Load)
+# --     1   1   1   C_RW_S  (Cached-ReadWrite-Shared)
+# --     1   1   0   C_RW_US (Cached-ReadWrite-Unshared)
+# --     1   0   1   C_RO_S  (Cached-ReadOnly-Shared)
+# --     1   0   0   C_RO_US (Cached-ReadOnly-Unshared)
+# --     0   1   x   UC_L2   (Uncached_GL2)
+# --     0   0   x   UC_All  (Uncached_All_Load)
 # default value: 5/C_RO_S (only allow caching in GL2 for read. Shared)
-parser.add_argument("--m-type", type=int, default=5,
-                    help="Default Mtype for GPU memory accesses.  This is the "
-                    "value used for all memory accesses on an APU and is the "
-                    "default mode for dGPU unless explicitly overwritten by "
-                    "the driver on a per-page basis.  Valid values are "
-                    "between 0-7")
+parser.add_argument(
+    "--m-type",
+    type=int,
+    default=5,
+    help="Default Mtype for GPU memory accesses.  This is the "
+    "value used for all memory accesses on an APU and is the "
+    "default mode for dGPU unless explicitly overwritten by "
+    "the driver on a per-page basis.  Valid values are "
+    "between 0-7",
+)
 
-parser.add_argument("--gfx-version", type=str, default='gfx801',
-                    choices=GfxVersion.vals,
-                    help="Gfx version for gpu"
-                    "Note: gfx902 is not fully supported by ROCm")
+parser.add_argument(
+    "--gfx-version",
+    type=str,
+    default="gfx801",
+    choices=GfxVersion.vals,
+    help="Gfx version for gpu" "Note: gfx902 is not fully supported by ROCm",
+)
 
 Ruby.define_options(parser)
 
@@ -206,21 +372,20 @@
     benchmark_path = [args.benchmark_root]
 else:
     # Set default benchmark search path to current dir
-    benchmark_path = ['.']
+    benchmark_path = ["."]
 
 ########################## Sanity Check ########################
 
 # Currently the gpu model requires ruby
-if buildEnv['PROTOCOL'] == 'None':
+if buildEnv["PROTOCOL"] == "None":
     fatal("GPU model requires ruby")
 
 # Currently the gpu model requires only timing or detailed CPU
-if not (args.cpu_type == "TimingSimpleCPU" or
-   args.cpu_type == "DerivO3CPU"):
+if not (args.cpu_type == "TimingSimpleCPU" or args.cpu_type == "DerivO3CPU"):
     fatal("GPU model requires TimingSimpleCPU or DerivO3CPU")
 
 # This file can support multiple compute units
-assert(args.num_compute_units >= 1)
+assert args.num_compute_units >= 1
 
 # Currently, the sqc (I-Cache of GPU) is shared by
 # multiple compute units(CUs). The protocol works just fine
@@ -229,20 +394,28 @@
 # sharing sqc is the common usage)
 n_cu = args.num_compute_units
 num_sqc = int(math.ceil(float(n_cu) / args.cu_per_sqc))
-args.num_sqc = num_sqc # pass this to Ruby
+args.num_sqc = num_sqc  # pass this to Ruby
 num_scalar_cache = int(math.ceil(float(n_cu) / args.cu_per_scalar_cache))
 args.num_scalar_cache = num_scalar_cache
 
-print('Num SQC = ', num_sqc, 'Num scalar caches = ', num_scalar_cache,
-      'Num CU = ', n_cu)
+print(
+    "Num SQC = ",
+    num_sqc,
+    "Num scalar caches = ",
+    num_scalar_cache,
+    "Num CU = ",
+    n_cu,
+)
 
 ########################## Creating the GPU system ########################
 # shader is the GPU
-shader = Shader(n_wf = args.wfs_per_simd,
-                clk_domain = SrcClockDomain(
-                    clock = args.gpu_clock,
-                    voltage_domain = VoltageDomain(
-                        voltage = args.gpu_voltage)))
+shader = Shader(
+    n_wf=args.wfs_per_simd,
+    clk_domain=SrcClockDomain(
+        clock=args.gpu_clock,
+        voltage_domain=VoltageDomain(voltage=args.gpu_voltage),
+    ),
+)
 
 # VIPER GPU protocol implements release consistency at GPU side. So,
 # we make their writes visible to the global memory and should read
@@ -252,7 +425,7 @@
 # means pipeline initiates a acquire/release operation at kernel launch/end.
 # VIPER protocol is write-through based, and thus only impl_kern_launch_acq
 # needs to set.
-if (buildEnv['PROTOCOL'] == 'GPU_VIPER'):
+if buildEnv["PROTOCOL"] == "GPU_VIPER":
     shader.impl_kern_launch_acq = True
     shader.impl_kern_end_rel = False
 else:
@@ -267,33 +440,36 @@
 # List of compute units; one GPU can have multiple compute units
 compute_units = []
 for i in range(n_cu):
-    compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane,
-                                     num_SIMDs = args.simds_per_cu,
-                                     wf_size = args.wf_size,
-                                     spbypass_pipe_length = \
-                                     args.sp_bypass_path_length,
-                                     dpbypass_pipe_length = \
-                                     args.dp_bypass_path_length,
-                                     issue_period = args.issue_period,
-                                     coalescer_to_vrf_bus_width = \
-                                     args.glbmem_rd_bus_width,
-                                     vrf_to_coalescer_bus_width = \
-                                     args.glbmem_wr_bus_width,
-                                     num_global_mem_pipes = \
-                                     args.glb_mem_pipes_per_cu,
-                                     num_shared_mem_pipes = \
-                                     args.shr_mem_pipes_per_cu,
-                                     n_wf = args.wfs_per_simd,
-                                     execPolicy = args.CUExecPolicy,
-                                     debugSegFault = args.SegFaultDebug,
-                                     functionalTLB = args.FunctionalTLB,
-                                     localMemBarrier = args.LocalMemBarrier,
-                                     countPages = args.countPages,
-                                     localDataStore = \
-                                     LdsState(banks = args.numLdsBanks,
-                                              bankConflictPenalty = \
-                                              args.ldsBankConflictPenalty,
-                                              size = args.lds_size)))
+    compute_units.append(
+        ComputeUnit(
+            cu_id=i,
+            perLaneTLB=per_lane,
+            num_SIMDs=args.simds_per_cu,
+            wf_size=args.wf_size,
+            spbypass_pipe_length=args.sp_bypass_path_length,
+            dpbypass_pipe_length=args.dp_bypass_path_length,
+            issue_period=args.issue_period,
+            coalescer_to_vrf_bus_width=args.glbmem_rd_bus_width,
+            vrf_to_coalescer_bus_width=args.glbmem_wr_bus_width,
+            num_global_mem_pipes=args.glb_mem_pipes_per_cu,
+            num_shared_mem_pipes=args.shr_mem_pipes_per_cu,
+            n_wf=args.wfs_per_simd,
+            execPolicy=args.CUExecPolicy,
+            debugSegFault=args.SegFaultDebug,
+            functionalTLB=args.FunctionalTLB,
+            localMemBarrier=args.LocalMemBarrier,
+            countPages=args.countPages,
+            max_cu_tokens=args.max_cu_tokens,
+            vrf_lm_bus_latency=args.vrf_lm_bus_latency,
+            mem_req_latency=args.mem_req_latency,
+            mem_resp_latency=args.mem_resp_latency,
+            localDataStore=LdsState(
+                banks=args.numLdsBanks,
+                bankConflictPenalty=args.ldsBankConflictPenalty,
+                size=args.lds_size,
+            ),
+        )
+    )
     wavefronts = []
     vrfs = []
     vrf_pool_mgrs = []
@@ -301,48 +477,65 @@
     srf_pool_mgrs = []
     for j in range(args.simds_per_cu):
         for k in range(shader.n_wf):
-            wavefronts.append(Wavefront(simdId = j, wf_slot_id = k,
-                                        wf_size = args.wf_size))
+            wavefronts.append(
+                Wavefront(simdId=j, wf_slot_id=k, wf_size=args.wf_size)
+            )
 
         if args.reg_alloc_policy == "simple":
-            vrf_pool_mgrs.append(SimplePoolManager(pool_size = \
-                                               args.vreg_file_size,
-                                               min_alloc = \
-                                               args.vreg_min_alloc))
-            srf_pool_mgrs.append(SimplePoolManager(pool_size = \
-                                               args.sreg_file_size,
-                                               min_alloc = \
-                                               args.vreg_min_alloc))
+            vrf_pool_mgrs.append(
+                SimplePoolManager(
+                    pool_size=args.vreg_file_size,
+                    min_alloc=args.vreg_min_alloc,
+                )
+            )
+            srf_pool_mgrs.append(
+                SimplePoolManager(
+                    pool_size=args.sreg_file_size,
+                    min_alloc=args.vreg_min_alloc,
+                )
+            )
         elif args.reg_alloc_policy == "dynamic":
-            vrf_pool_mgrs.append(DynPoolManager(pool_size = \
-                                               args.vreg_file_size,
-                                               min_alloc = \
-                                               args.vreg_min_alloc))
-            srf_pool_mgrs.append(DynPoolManager(pool_size = \
-                                               args.sreg_file_size,
-                                               min_alloc = \
-                                               args.vreg_min_alloc))
+            vrf_pool_mgrs.append(
+                DynPoolManager(
+                    pool_size=args.vreg_file_size,
+                    min_alloc=args.vreg_min_alloc,
+                )
+            )
+            srf_pool_mgrs.append(
+                DynPoolManager(
+                    pool_size=args.sreg_file_size,
+                    min_alloc=args.vreg_min_alloc,
+                )
+            )
 
-        vrfs.append(VectorRegisterFile(simd_id=j, wf_size=args.wf_size,
-                                       num_regs=args.vreg_file_size))
-        srfs.append(ScalarRegisterFile(simd_id=j, wf_size=args.wf_size,
-                                       num_regs=args.sreg_file_size))
+        vrfs.append(
+            VectorRegisterFile(
+                simd_id=j, wf_size=args.wf_size, num_regs=args.vreg_file_size
+            )
+        )
+        srfs.append(
+            ScalarRegisterFile(
+                simd_id=j, wf_size=args.wf_size, num_regs=args.sreg_file_size
+            )
+        )
 
     compute_units[-1].wavefronts = wavefronts
     compute_units[-1].vector_register_file = vrfs
     compute_units[-1].scalar_register_file = srfs
-    compute_units[-1].register_manager = \
-        RegisterManager(policy=args.registerManagerPolicy,
-                        vrf_pool_managers=vrf_pool_mgrs,
-                        srf_pool_managers=srf_pool_mgrs)
+    compute_units[-1].register_manager = RegisterManager(
+        policy=args.registerManagerPolicy,
+        vrf_pool_managers=vrf_pool_mgrs,
+        srf_pool_managers=srf_pool_mgrs,
+    )
     if args.TLB_prefetch:
         compute_units[-1].prefetch_depth = args.TLB_prefetch
         compute_units[-1].prefetch_prev_type = args.pf_type
 
     # attach the LDS and the CU to the bus (actually a Bridge)
     compute_units[-1].ldsPort = compute_units[-1].ldsBus.cpu_side_port
-    compute_units[-1].ldsBus.mem_side_port = \
-        compute_units[-1].localDataStore.cuPort
+    compute_units[-1].ldsBus.mem_side_port = compute_units[
+        -1
+    ].localDataStore.cuPort
 
 # Attach compute units to GPU
 shader.CUs = compute_units
@@ -361,20 +554,22 @@
 CpuClass, mem_mode = Simulation.getCPUClass(args.cpu_type)
 if CpuClass == AtomicSimpleCPU:
     fatal("AtomicSimpleCPU is not supported")
-if mem_mode != 'timing':
+if mem_mode != "timing":
     fatal("Only the timing memory mode is supported")
 shader.timing = True
 
 if args.fast_forward and args.fast_forward_pseudo_op:
-    fatal("Cannot fast-forward based both on the number of instructions and"
-          " on pseudo-ops")
+    fatal(
+        "Cannot fast-forward based both on the number of instructions and"
+        " on pseudo-ops"
+    )
 fast_forward = args.fast_forward or args.fast_forward_pseudo_op
 
 if fast_forward:
     FutureCpuClass, future_mem_mode = CpuClass, mem_mode
 
     CpuClass = X86KvmCPU
-    mem_mode = 'atomic_noncaching'
+    mem_mode = "atomic_noncaching"
     # Leave shader.timing untouched, because its value only matters at the
     # start of the simulation and because we require switching cpus
     # *before* the first kernel launch.
@@ -383,11 +578,13 @@
 
     # Initial CPUs to be used during fast-forwarding.
     for i in range(args.num_cpus):
-        cpu = CpuClass(cpu_id = i,
-                       clk_domain = SrcClockDomain(
-                           clock = args.CPUClock,
-                           voltage_domain = VoltageDomain(
-                               voltage = args.cpu_voltage)))
+        cpu = CpuClass(
+            cpu_id=i,
+            clk_domain=SrcClockDomain(
+                clock=args.CPUClock,
+                voltage_domain=VoltageDomain(voltage=args.cpu_voltage),
+            ),
+        )
         cpu_list.append(cpu)
 
         if args.fast_forward:
@@ -400,20 +597,24 @@
 
 # CPs to be used throughout the simulation.
 for i in range(args.num_cp):
-    cp = MainCpuClass(cpu_id = args.num_cpus + i,
-                      clk_domain = SrcClockDomain(
-                          clock = args.CPUClock,
-                          voltage_domain = VoltageDomain(
-                              voltage = args.cpu_voltage)))
+    cp = MainCpuClass(
+        cpu_id=args.num_cpus + i,
+        clk_domain=SrcClockDomain(
+            clock=args.CPUClock,
+            voltage_domain=VoltageDomain(voltage=args.cpu_voltage),
+        ),
+    )
     cp_list.append(cp)
 
 # Main CPUs (to be used after fast-forwarding if fast-forwarding is specified).
 for i in range(args.num_cpus):
-    cpu = MainCpuClass(cpu_id = i,
-                       clk_domain = SrcClockDomain(
-                           clock = args.CPUClock,
-                           voltage_domain = VoltageDomain(
-                               voltage = args.cpu_voltage)))
+    cpu = MainCpuClass(
+        cpu_id=i,
+        clk_domain=SrcClockDomain(
+            clock=args.CPUClock,
+            voltage_domain=VoltageDomain(voltage=args.cpu_voltage),
+        ),
+    )
     if fast_forward:
         cpu.switched_out = True
         future_cpu_list.append(cpu)
@@ -434,21 +635,25 @@
 
 # HSA kernel mode driver
 # dGPUPoolID is 0 because we only have one memory pool
-gpu_driver = GPUComputeDriver(filename = "kfd", isdGPU = args.dgpu,
-                              gfxVersion = args.gfx_version,
-                              dGPUPoolID = 0, m_type = args.m_type)
+gpu_driver = GPUComputeDriver(
+    filename="kfd",
+    isdGPU=args.dgpu,
+    gfxVersion=args.gfx_version,
+    dGPUPoolID=0,
+    m_type=args.m_type,
+)
 
 renderDriNum = 128
-render_driver = GPURenderDriver(filename = f'dri/renderD{renderDriNum}')
+render_driver = GPURenderDriver(filename=f"dri/renderD{renderDriNum}")
 
 # Creating the GPU kernel launching components: that is the HSA
 # packet processor (HSAPP), GPU command processor (CP), and the
 # dispatcher.
-gpu_hsapp = HSAPacketProcessor(pioAddr=hsapp_gpu_map_paddr,
-                               numHWQueues=args.num_hw_queues)
+gpu_hsapp = HSAPacketProcessor(
+    pioAddr=hsapp_gpu_map_paddr, numHWQueues=args.num_hw_queues
+)
 dispatcher = GPUDispatcher()
-gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp,
-                                   dispatcher=dispatcher)
+gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp, dispatcher=dispatcher)
 gpu_driver.device = gpu_cmd_proc
 shader.dispatcher = dispatcher
 shader.gpu_cmd_proc = gpu_cmd_proc
@@ -465,9 +670,11 @@
             return full_path
     fatal("%s not found in %s" % (rel_path, base_list))
 
+
 def find_file(base_list, rel_path):
     return find_path(base_list, rel_path, os.path.isfile)
 
+
 executable = find_path(benchmark_path, args.cmd, os.path.exists)
 # It's common for a benchmark to be in a directory with the same
 # name as the executable, so we handle that automatically
@@ -476,35 +683,43 @@
     executable = find_file(benchmark_path, args.cmd)
 
 if args.env:
-    with open(args.env, 'r') as f:
+    with open(args.env, "r") as f:
         env = [line.rstrip() for line in f]
 else:
-    env = ['LD_LIBRARY_PATH=%s' % ':'.join([
-               os.getenv('ROCM_PATH','/opt/rocm')+'/lib',
-               os.getenv('HCC_HOME','/opt/rocm/hcc')+'/lib',
-               os.getenv('HSA_PATH','/opt/rocm/hsa')+'/lib',
-               os.getenv('HIP_PATH','/opt/rocm/hip')+'/lib',
-               os.getenv('ROCM_PATH','/opt/rocm')+'/libhsakmt/lib',
-               os.getenv('ROCM_PATH','/opt/rocm')+'/miopen/lib',
-               os.getenv('ROCM_PATH','/opt/rocm')+'/miopengemm/lib',
-               os.getenv('ROCM_PATH','/opt/rocm')+'/hipblas/lib',
-               os.getenv('ROCM_PATH','/opt/rocm')+'/rocblas/lib',
-               "/usr/lib/x86_64-linux-gnu"
-           ]),
-           'HOME=%s' % os.getenv('HOME','/'),
-           # Disable the VM fault handler signal creation for dGPUs also
-           # forces the use of DefaultSignals instead of driver-controlled
-           # InteruptSignals throughout the runtime.  DefaultSignals poll
-           # on memory in the runtime, while InteruptSignals call into the
-           # driver.
-           "HSA_ENABLE_INTERRUPT=1",
-           # We don't have an SDMA hardware model, so need to fallback to
-           # vector copy kernels for dGPU memcopies to/from host and device.
-           "HSA_ENABLE_SDMA=0"]
+    env = [
+        "LD_LIBRARY_PATH=%s"
+        % ":".join(
+            [
+                os.getenv("ROCM_PATH", "/opt/rocm") + "/lib",
+                os.getenv("HCC_HOME", "/opt/rocm/hcc") + "/lib",
+                os.getenv("HSA_PATH", "/opt/rocm/hsa") + "/lib",
+                os.getenv("HIP_PATH", "/opt/rocm/hip") + "/lib",
+                os.getenv("ROCM_PATH", "/opt/rocm") + "/libhsakmt/lib",
+                os.getenv("ROCM_PATH", "/opt/rocm") + "/miopen/lib",
+                os.getenv("ROCM_PATH", "/opt/rocm") + "/miopengemm/lib",
+                os.getenv("ROCM_PATH", "/opt/rocm") + "/hipblas/lib",
+                os.getenv("ROCM_PATH", "/opt/rocm") + "/rocblas/lib",
+                "/usr/lib/x86_64-linux-gnu",
+            ]
+        ),
+        "HOME=%s" % os.getenv("HOME", "/"),
+        # Disable the VM fault handler signal creation for dGPUs also
+        # forces the use of DefaultSignals instead of driver-controlled
+        # InteruptSignals throughout the runtime.  DefaultSignals poll
+        # on memory in the runtime, while InteruptSignals call into the
+        # driver.
+        "HSA_ENABLE_INTERRUPT=1",
+        # We don't have an SDMA hardware model, so need to fallback to
+        # vector copy kernels for dGPU memcopies to/from host and device.
+        "HSA_ENABLE_SDMA=0",
+    ]
 
-process = Process(executable = executable, cmd = [args.cmd]
-                  + args.options.split(),
-                  drivers = [gpu_driver, render_driver], env = env)
+process = Process(
+    executable=executable,
+    cmd=[args.cmd] + args.options.split(),
+    drivers=[gpu_driver, render_driver],
+    env=env,
+)
 
 for cpu in cpu_list:
     cpu.createThreads()
@@ -521,30 +736,39 @@
 ########################## Create the overall system ########################
 # List of CPUs that must be switched when moving between KVM and simulation
 if fast_forward:
-    switch_cpu_list = \
-        [(cpu_list[i], future_cpu_list[i]) for i in range(args.num_cpus)]
+    switch_cpu_list = [
+        (cpu_list[i], future_cpu_list[i]) for i in range(args.num_cpus)
+    ]
+
+# Other CPU strings cause bad addresses in ROCm. Revert back to M5 Simulator.
+for (i, cpu) in enumerate(cpu_list):
+    for j in range(len(cpu)):
+        cpu.isa[j].vendor_string = "M5 Simulator"
 
 # Full list of processing cores in the system.
 cpu_list = cpu_list + [shader] + cp_list
 
 # creating the overall system
 # notice the cpu list is explicitly added as a parameter to System
-system = System(cpu = cpu_list,
-                mem_ranges = [AddrRange(args.mem_size)],
-                cache_line_size = args.cacheline_size,
-                mem_mode = mem_mode,
-                workload = SEWorkload.init_compatible(executable))
+system = System(
+    cpu=cpu_list,
+    mem_ranges=[AddrRange(args.mem_size)],
+    cache_line_size=args.cacheline_size,
+    mem_mode=mem_mode,
+    workload=SEWorkload.init_compatible(executable),
+)
 if fast_forward:
     system.future_cpu = future_cpu_list
-system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
-system.clk_domain = SrcClockDomain(clock =  args.sys_clock,
-                                   voltage_domain = system.voltage_domain)
+system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
+system.clk_domain = SrcClockDomain(
+    clock=args.sys_clock, voltage_domain=system.voltage_domain
+)
 
 if fast_forward:
-    have_kvm_support = 'BaseKvmCPU' in globals()
-    if have_kvm_support and buildEnv['TARGET_ISA'] == "x86":
+    have_kvm_support = "BaseKvmCPU" in globals()
+    if have_kvm_support and get_runtime_isa() == ISA.X86:
         system.vm = KvmVM()
-        system.m5ops_base = 0xffff0000
+        system.m5ops_base = 0xFFFF0000
         for i in range(len(host_cpu.workload)):
             host_cpu.workload[i].useArchPT = True
             host_cpu.workload[i].kvmInSE = True
@@ -555,17 +779,19 @@
 GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx)
 
 # create Ruby system
-system.piobus = IOXBar(width=32, response_latency=0,
-                       frontend_latency=0, forward_latency=0)
+system.piobus = IOXBar(
+    width=32, response_latency=0, frontend_latency=0, forward_latency=0
+)
 dma_list = [gpu_hsapp, gpu_cmd_proc]
 Ruby.create_system(args, None, system, None, dma_list, None)
-system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
-                                    voltage_domain = system.voltage_domain)
+system.ruby.clk_domain = SrcClockDomain(
+    clock=args.ruby_clock, voltage_domain=system.voltage_domain
+)
 gpu_cmd_proc.pio = system.piobus.mem_side_ports
 gpu_hsapp.pio = system.piobus.mem_side_ports
 
 for i, dma_device in enumerate(dma_list):
-    exec('system.dma_cntrl%d.clk_domain = system.ruby.clk_domain' % i)
+    exec("system.dma_cntrl%d.clk_domain = system.ruby.clk_domain" % i)
 
 # attach the CPU ports to Ruby
 for i in range(args.num_cpus):
@@ -579,15 +805,18 @@
     system.cpu[i].dcache_port = ruby_port.in_ports
 
     ruby_port.mem_request_port = system.piobus.cpu_side_ports
-    if buildEnv['TARGET_ISA'] == "x86":
+    if get_runtime_isa() == ISA.X86:
         system.cpu[i].interrupts[0].pio = system.piobus.mem_side_ports
-        system.cpu[i].interrupts[0].int_requestor = \
-            system.piobus.cpu_side_ports
-        system.cpu[i].interrupts[0].int_responder = \
-            system.piobus.mem_side_ports
+        system.cpu[i].interrupts[
+            0
+        ].int_requestor = system.piobus.cpu_side_ports
+        system.cpu[i].interrupts[
+            0
+        ].int_responder = system.piobus.mem_side_ports
         if fast_forward:
             system.cpu[i].mmu.connectWalkerPorts(
-                ruby_port.in_ports, ruby_port.in_ports)
+                ruby_port.in_ports, ruby_port.in_ports
+            )
 
 # attach CU ports to Ruby
 # Because of the peculiarities of the CP core, you may have 1 CPU but 2
@@ -596,9 +825,12 @@
 # the index as below, but note that this assumes there is one sequencer
 # per compute unit and one sequencer per SQC for the math to work out
 # correctly.
-gpu_port_idx = len(system.ruby._cpu_ports) \
-               - args.num_compute_units - args.num_sqc \
-               - args.num_scalar_cache
+gpu_port_idx = (
+    len(system.ruby._cpu_ports)
+    - args.num_compute_units
+    - args.num_sqc
+    - args.num_scalar_cache
+)
 gpu_port_idx = gpu_port_idx - args.num_cp * 2
 
 # Connect token ports. For this we need to search through the list of all
@@ -607,8 +839,9 @@
 token_port_idx = 0
 for i in range(len(system.ruby._cpu_ports)):
     if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer):
-        system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = \
-            system.ruby._cpu_ports[i].gmTokenPort
+        system.cpu[shader_idx].CUs[
+            token_port_idx
+        ].gmTokenPort = system.ruby._cpu_ports[i].gmTokenPort
         token_port_idx += 1
 
 wavefront_size = args.wf_size
@@ -616,38 +849,45 @@
     # The pipeline issues wavefront_size number of uncoalesced requests
     # in one GPU issue cycle. Hence wavefront_size mem ports.
     for j in range(wavefront_size):
-        system.cpu[shader_idx].CUs[i].memory_port[j] = \
-                  system.ruby._cpu_ports[gpu_port_idx].in_ports[j]
+        system.cpu[shader_idx].CUs[i].memory_port[j] = system.ruby._cpu_ports[
+            gpu_port_idx
+        ].in_ports[j]
     gpu_port_idx += 1
 
 for i in range(n_cu):
     if i > 0 and not i % args.cu_per_sqc:
         print("incrementing idx on ", i)
         gpu_port_idx += 1
-    system.cpu[shader_idx].CUs[i].sqc_port = \
-            system.ruby._cpu_ports[gpu_port_idx].in_ports
+    system.cpu[shader_idx].CUs[i].sqc_port = system.ruby._cpu_ports[
+        gpu_port_idx
+    ].in_ports
 gpu_port_idx = gpu_port_idx + 1
 
 for i in range(n_cu):
     if i > 0 and not i % args.cu_per_scalar_cache:
         print("incrementing idx on ", i)
         gpu_port_idx += 1
-    system.cpu[shader_idx].CUs[i].scalar_port = \
-        system.ruby._cpu_ports[gpu_port_idx].in_ports
+    system.cpu[shader_idx].CUs[i].scalar_port = system.ruby._cpu_ports[
+        gpu_port_idx
+    ].in_ports
 gpu_port_idx = gpu_port_idx + 1
 
 # attach CP ports to Ruby
 for i in range(args.num_cp):
     system.cpu[cp_idx].createInterruptController()
-    system.cpu[cp_idx].dcache_port = \
-                system.ruby._cpu_ports[gpu_port_idx + i * 2].in_ports
-    system.cpu[cp_idx].icache_port = \
-                system.ruby._cpu_ports[gpu_port_idx + i * 2 + 1].in_ports
+    system.cpu[cp_idx].dcache_port = system.ruby._cpu_ports[
+        gpu_port_idx + i * 2
+    ].in_ports
+    system.cpu[cp_idx].icache_port = system.ruby._cpu_ports[
+        gpu_port_idx + i * 2 + 1
+    ].in_ports
     system.cpu[cp_idx].interrupts[0].pio = system.piobus.mem_side_ports
-    system.cpu[cp_idx].interrupts[0].int_requestor = \
-        system.piobus.cpu_side_ports
-    system.cpu[cp_idx].interrupts[0].int_responder = \
-        system.piobus.mem_side_ports
+    system.cpu[cp_idx].interrupts[
+        0
+    ].int_requestor = system.piobus.cpu_side_ports
+    system.cpu[cp_idx].interrupts[
+        0
+    ].int_responder = system.piobus.mem_side_ports
     cp_idx = cp_idx + 1
 
 ################# Connect the CPU and GPU via GPU Dispatcher ##################
@@ -665,15 +905,17 @@
 
 ########################## Start simulation ########################
 
-redirect_paths = [RedirectPath(app_path = "/proc",
-                               host_paths =
-                                ["%s/fs/proc" % m5.options.outdir]),
-                  RedirectPath(app_path = "/sys",
-                               host_paths =
-                                ["%s/fs/sys"  % m5.options.outdir]),
-                  RedirectPath(app_path = "/tmp",
-                               host_paths =
-                                ["%s/fs/tmp"  % m5.options.outdir])]
+redirect_paths = [
+    RedirectPath(
+        app_path="/proc", host_paths=["%s/fs/proc" % m5.options.outdir]
+    ),
+    RedirectPath(
+        app_path="/sys", host_paths=["%s/fs/sys" % m5.options.outdir]
+    ),
+    RedirectPath(
+        app_path="/tmp", host_paths=["%s/fs/tmp" % m5.options.outdir]
+    ),
+]
 
 system.redirect_paths = redirect_paths
 
@@ -682,18 +924,22 @@
 # Create the /sys/devices filesystem for the simulator so that the HSA Runtime
 # knows what type of GPU hardware we are simulating
 if args.dgpu:
-    assert (args.gfx_version in ['gfx803', 'gfx900']),\
-            "Incorrect gfx version for dGPU"
-    if args.gfx_version == 'gfx803':
+    assert args.gfx_version in [
+        "gfx803",
+        "gfx900",
+    ], "Incorrect gfx version for dGPU"
+    if args.gfx_version == "gfx803":
         hsaTopology.createFijiTopology(args)
-    elif args.gfx_version == 'gfx900':
+    elif args.gfx_version == "gfx900":
         hsaTopology.createVegaTopology(args)
 else:
-    assert (args.gfx_version in ['gfx801', 'gfx902']),\
-            "Incorrect gfx version for APU"
+    assert args.gfx_version in [
+        "gfx801",
+        "gfx902",
+    ], "Incorrect gfx version for APU"
     hsaTopology.createCarrizoTopology(args)
 
-m5.ticks.setGlobalFrequency('1THz')
+m5.ticks.setGlobalFrequency("1THz")
 if args.abs_max_tick:
     maxtick = args.abs_max_tick
 else:
@@ -703,8 +949,7 @@
 Simulation.setWorkCountOptions(system, args)
 
 # Checkpointing is not supported by APU model
-if (args.checkpoint_dir != None or
-    args.checkpoint_restore != None):
+if args.checkpoint_dir != None or args.checkpoint_restore != None:
     fatal("Checkpointing not supported by apu model")
 
 checkpoint_dir = None
@@ -739,6 +984,6 @@
         exit_event = m5.simulate(maxtick - m5.curTick())
 
 print("Ticks:", m5.curTick())
-print('Exiting because ', exit_event.getCause())
+print("Exiting because ", exit_event.getCause())
 
 sys.exit(exit_event.getCode())
diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index 44e3fd1..9eeba37 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -46,7 +46,7 @@
 from m5.options import *
 import argparse
 
-m5.util.addToPath('../..')
+m5.util.addToPath("../..")
 
 from common import SysPaths
 from common import MemConfig
@@ -60,25 +60,21 @@
 # l1_icache_class, l1_dcache_class, walk_cache_class, l2_Cache_class). Any of
 # the cache class may be 'None' if the particular cache is not present.
 cpu_types = {
-
-    "atomic" : ( AtomicSimpleCPU, None, None, None),
-    "minor" : (MinorCPU,
-               devices.L1I, devices.L1D,
-               devices.L2),
-    "hpi" : ( HPI.HPI,
-              HPI.HPI_ICache, HPI.HPI_DCache,
-              HPI.HPI_L2)
+    "atomic": (AtomicSimpleCPU, None, None, None),
+    "minor": (MinorCPU, devices.L1I, devices.L1D, devices.L2),
+    "hpi": (HPI.HPI, HPI.HPI_ICache, HPI.HPI_DCache, HPI.HPI_L2),
 }
 
+
 def create_cow_image(name):
     """Helper function to create a Copy-on-Write disk image"""
     image = CowDiskImage()
     image.child.image_file = name
-    return image;
+    return image
 
 
 def create(args):
-    ''' Create and configure the system object. '''
+    """Create and configure the system object."""
 
     if args.readfile and not os.path.isfile(args.readfile):
         print("Error: Bootscript %s does not exist" % args.readfile)
@@ -93,11 +89,13 @@
 
     platform = ObjectList.platform_list.get(args.machine_type)
 
-    system = devices.SimpleSystem(want_caches,
-                                  args.mem_size,
-                                  platform=platform(),
-                                  mem_mode=mem_mode,
-                                  readfile=args.readfile)
+    system = devices.SimpleSystem(
+        want_caches,
+        args.mem_size,
+        platform=platform(),
+        mem_mode=mem_mode,
+        readfile=args.readfile,
+    )
 
     MemConfig.config_mem(args, system)
 
@@ -107,7 +105,7 @@
             stdout=args.semi_stdout,
             stderr=args.semi_stderr,
             files_root_dir=args.semi_path,
-            cmd_line = " ".join([ object_file ] + args.args)
+            cmd_line=" ".join([object_file] + args.args),
         )
 
     if args.disk_image:
@@ -116,17 +114,17 @@
         # functionality to avoid writing changes to the stored copy of
         # the disk image.
         system.realview.vio[0].vio = VirtIOBlock(
-            image=create_cow_image(args.disk_image))
+            image=create_cow_image(args.disk_image)
+        )
 
     # Wire up the system's memory system
     system.connect()
 
     # Add CPU clusters to the system
     system.cpu_cluster = [
-        devices.CpuCluster(system,
-                           args.num_cores,
-                           args.cpu_freq, "1.0V",
-                           *cpu_types[args.cpu]),
+        devices.CpuCluster(
+            system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu]
+        )
     ]
 
     # Create a cache hierarchy for the cluster. We are assuming that
@@ -143,11 +141,11 @@
     system.highest_el_is_64 = True
 
     workload_class = workloads.workload_list.get(args.workload)
-    system.workload = workload_class(
-        object_file, system)
+    system.workload = workload_class(object_file, system)
 
     return system
 
+
 def run(args):
     cptdir = m5.options.outdir
     if args.checkpoint:
@@ -171,67 +169,118 @@
 def main():
     parser = argparse.ArgumentParser(epilog=__doc__)
 
-    parser.add_argument("--kernel", type=str,
-                        default=None,
-                        help="Binary to run")
-    parser.add_argument("--workload", type=str,
-                        default="ArmBaremetal",
-                        choices=workloads.workload_list.get_names(),
-                        help="Workload type")
-    parser.add_argument("--disk-image", type=str,
-                        default=None,
-                        help="Disk to instantiate")
-    parser.add_argument("--readfile", type=str, default="",
-                        help = "File to return with the m5 readfile command")
-    parser.add_argument("--cpu", type=str, choices=list(cpu_types.keys()),
-                        default="atomic",
-                        help="CPU model to use")
+    parser.add_argument(
+        "--kernel", type=str, default=None, help="Binary to run"
+    )
+    parser.add_argument(
+        "--workload",
+        type=str,
+        default="ArmBaremetal",
+        choices=workloads.workload_list.get_names(),
+        help="Workload type",
+    )
+    parser.add_argument(
+        "--disk-image", type=str, default=None, help="Disk to instantiate"
+    )
+    parser.add_argument(
+        "--readfile",
+        type=str,
+        default="",
+        help="File to return with the m5 readfile command",
+    )
+    parser.add_argument(
+        "--cpu",
+        type=str,
+        choices=list(cpu_types.keys()),
+        default="atomic",
+        help="CPU model to use",
+    )
     parser.add_argument("--cpu-freq", type=str, default="4GHz")
-    parser.add_argument("--num-cores", type=int, default=1,
-                        help="Number of CPU cores")
-    parser.add_argument("--machine-type", type=str,
-                        choices=ObjectList.platform_list.get_names(),
-                        default="VExpress_GEM5_V2",
-                        help="Hardware platform class")
-    parser.add_argument("--mem-type", default="DDR3_1600_8x8",
-                        choices=ObjectList.mem_list.get_names(),
-                        help = "type of memory to use")
-    parser.add_argument("--mem-channels", type=int, default=1,
-                        help = "number of memory channels")
-    parser.add_argument("--mem-ranks", type=int, default=None,
-                        help = "number of memory ranks per channel")
-    parser.add_argument("--mem-size", action="store", type=str,
-                        default="2GB",
-                        help="Specify the physical memory size")
+    parser.add_argument(
+        "--num-cores", type=int, default=1, help="Number of CPU cores"
+    )
+    parser.add_argument(
+        "--machine-type",
+        type=str,
+        choices=ObjectList.platform_list.get_names(),
+        default="VExpress_GEM5_V2",
+        help="Hardware platform class",
+    )
+    parser.add_argument(
+        "--mem-type",
+        default="DDR3_1600_8x8",
+        choices=ObjectList.mem_list.get_names(),
+        help="type of memory to use",
+    )
+    parser.add_argument(
+        "--mem-channels", type=int, default=1, help="number of memory channels"
+    )
+    parser.add_argument(
+        "--mem-ranks",
+        type=int,
+        default=None,
+        help="number of memory ranks per channel",
+    )
+    parser.add_argument(
+        "--mem-size",
+        action="store",
+        type=str,
+        default="2GB",
+        help="Specify the physical memory size",
+    )
     parser.add_argument("--checkpoint", action="store_true")
     parser.add_argument("--restore", type=str, default=None)
-    parser.add_argument("--dtb-gen", action="store_true",
-                        help="Doesn't run simulation, it generates a DTB only")
-    parser.add_argument("--semi-enable", action="store_true",
-                        help="Enable semihosting support")
-    parser.add_argument("--semi-stdin", type=str, default="stdin",
-                        help="Standard input for semihosting " \
-                        "(default: gem5's stdin)")
-    parser.add_argument("--semi-stdout", type=str, default="stdout",
-                        help="Standard output for semihosting " \
-                        "(default: gem5's stdout)")
-    parser.add_argument("--semi-stderr", type=str, default="stderr",
-                        help="Standard error for semihosting " \
-                        "(default: gem5's stderr)")
-    parser.add_argument('--semi-path', type=str,
-                        default="",
-                        help=('Search path for files to be loaded through '
-                              'Arm Semihosting'))
-    parser.add_argument("args", default=[], nargs="*",
-                        help="Semihosting arguments to pass to benchmark")
-    parser.add_argument("-P", "--param", action="append", default=[],
+    parser.add_argument(
+        "--dtb-gen",
+        action="store_true",
+        help="Doesn't run simulation, it generates a DTB only",
+    )
+    parser.add_argument(
+        "--semi-enable", action="store_true", help="Enable semihosting support"
+    )
+    parser.add_argument(
+        "--semi-stdin",
+        type=str,
+        default="stdin",
+        help="Standard input for semihosting " "(default: gem5's stdin)",
+    )
+    parser.add_argument(
+        "--semi-stdout",
+        type=str,
+        default="stdout",
+        help="Standard output for semihosting " "(default: gem5's stdout)",
+    )
+    parser.add_argument(
+        "--semi-stderr",
+        type=str,
+        default="stderr",
+        help="Standard error for semihosting " "(default: gem5's stderr)",
+    )
+    parser.add_argument(
+        "--semi-path",
+        type=str,
+        default="",
+        help=("Search path for files to be loaded through " "Arm Semihosting"),
+    )
+    parser.add_argument(
+        "args",
+        default=[],
+        nargs="*",
+        help="Semihosting arguments to pass to benchmark",
+    )
+    parser.add_argument(
+        "-P",
+        "--param",
+        action="append",
+        default=[],
         help="Set a SimObject parameter relative to the root node. "
-             "An extended Python multi range slicing syntax can be used "
-             "for arrays. For example: "
-             "'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
-             "sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
-             "Direct parameters of the root object are not accessible, "
-             "only parameters of its children.")
+        "An extended Python multi range slicing syntax can be used "
+        "for arrays. For example: "
+        "'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
+        "sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
+        "Direct parameters of the root object are not accessible, "
+        "only parameters of its children.",
+    )
 
     args = parser.parse_args()
 
@@ -247,9 +296,10 @@
 
     if args.dtb_gen:
         # No run, autogenerate DTB and exit
-        root.system.generateDtb(os.path.join(m5.options.outdir, 'system.dtb'))
+        root.system.generateDtb(os.path.join(m5.options.outdir, "system.dtb"))
     else:
         run(args)
 
+
 if __name__ == "__m5_main__":
     main()
diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py
index a488ab3..c6560d7 100644
--- a/configs/example/arm/devices.py
+++ b/configs/example/arm/devices.py
@@ -37,20 +37,22 @@
 
 import m5
 from m5.objects import *
-m5.util.addToPath('../../')
+
+m5.util.addToPath("../../")
 from common.Caches import *
 from common import ObjectList
 
 have_kvm = "ArmV8KvmCPU" in ObjectList.cpu_list.get_names()
 have_fastmodel = "FastModelCortexA76" in ObjectList.cpu_list.get_names()
 
+
 class L1I(L1_ICache):
     tag_latency = 1
     data_latency = 1
     response_latency = 1
     mshrs = 4
     tgts_per_mshr = 8
-    size = '48kB'
+    size = "48kB"
     assoc = 3
 
 
@@ -60,7 +62,7 @@
     response_latency = 1
     mshrs = 16
     tgts_per_mshr = 16
-    size = '32kB'
+    size = "32kB"
     assoc = 2
     write_buffers = 16
 
@@ -71,21 +73,21 @@
     response_latency = 5
     mshrs = 32
     tgts_per_mshr = 8
-    size = '1MB'
+    size = "1MB"
     assoc = 16
     write_buffers = 8
-    clusivity='mostly_excl'
+    clusivity = "mostly_excl"
 
 
 class L3(Cache):
-    size = '16MB'
+    size = "16MB"
     assoc = 16
     tag_latency = 20
     data_latency = 20
     response_latency = 20
     mshrs = 20
     tgts_per_mshr = 12
-    clusivity='mostly_excl'
+    clusivity = "mostly_excl"
 
 
 class MemBus(SystemXBar):
@@ -94,8 +96,17 @@
 
 
 class CpuCluster(SubSystem):
-    def __init__(self, system,  num_cpus, cpu_clock, cpu_voltage,
-                 cpu_type, l1i_type, l1d_type, l2_type):
+    def __init__(
+        self,
+        system,
+        num_cpus,
+        cpu_clock,
+        cpu_voltage,
+        cpu_type,
+        l1i_type,
+        l1d_type,
+        l2_type,
+    ):
         super(CpuCluster, self).__init__()
         self._cpu_type = cpu_type
         self._l1i_type = l1i_type
@@ -105,12 +116,16 @@
         assert num_cpus > 0
 
         self.voltage_domain = VoltageDomain(voltage=cpu_voltage)
-        self.clk_domain = SrcClockDomain(clock=cpu_clock,
-                                         voltage_domain=self.voltage_domain)
+        self.clk_domain = SrcClockDomain(
+            clock=cpu_clock, voltage_domain=self.voltage_domain
+        )
 
-        self.cpus = [ self._cpu_type(cpu_id=system.numCpus() + idx,
-                                     clk_domain=self.clk_domain)
-                      for idx in range(num_cpus) ]
+        self.cpus = [
+            self._cpu_type(
+                cpu_id=system.numCpus() + idx, clk_domain=self.clk_domain
+            )
+            for idx in range(num_cpus)
+        ]
 
         for cpu in self.cpus:
             cpu.createThreads()
@@ -157,11 +172,14 @@
             int_cls = ArmPPI if pint < 32 else ArmSPI
             for isa in cpu.isa:
                 isa.pmu = ArmPMU(interrupt=int_cls(num=pint))
-                isa.pmu.addArchEvents(cpu=cpu,
-                                      itb=cpu.mmu.itb, dtb=cpu.mmu.dtb,
-                                      icache=getattr(cpu, 'icache', None),
-                                      dcache=getattr(cpu, 'dcache', None),
-                                      l2cache=getattr(self, 'l2', None))
+                isa.pmu.addArchEvents(
+                    cpu=cpu,
+                    itb=cpu.mmu.itb,
+                    dtb=cpu.mmu.dtb,
+                    icache=getattr(cpu, "icache", None),
+                    dcache=getattr(cpu, "dcache", None),
+                    l2cache=getattr(self, "l2", None),
+                )
                 for ev in events:
                     isa.pmu.addEvent(ev)
 
@@ -175,42 +193,55 @@
 
 class AtomicCluster(CpuCluster):
     def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
-        cpu_config = [ ObjectList.cpu_list.get("AtomicSimpleCPU"), None,
-                       None, None ]
-        super(AtomicCluster, self).__init__(system, num_cpus, cpu_clock,
-                                            cpu_voltage, *cpu_config)
+        cpu_config = [
+            ObjectList.cpu_list.get("AtomicSimpleCPU"),
+            None,
+            None,
+            None,
+        ]
+        super(AtomicCluster, self).__init__(
+            system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
+        )
+
     def addL1(self):
         pass
 
+
 class KvmCluster(CpuCluster):
     def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
-        cpu_config = [ ObjectList.cpu_list.get("ArmV8KvmCPU"), None, None,
-            None ]
-        super(KvmCluster, self).__init__(system, num_cpus, cpu_clock,
-                                         cpu_voltage, *cpu_config)
+        cpu_config = [ObjectList.cpu_list.get("ArmV8KvmCPU"), None, None, None]
+        super(KvmCluster, self).__init__(
+            system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
+        )
+
     def addL1(self):
         pass
 
+
 class FastmodelCluster(SubSystem):
-    def __init__(self, system,  num_cpus, cpu_clock, cpu_voltage="1.0V"):
+    def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
         super(FastmodelCluster, self).__init__()
 
         # Setup GIC
         gic = system.realview.gic
-        gic.sc_gic.cpu_affinities = ','.join(
-            [ '0.0.%d.0' % i for i in range(num_cpus) ])
+        gic.sc_gic.cpu_affinities = ",".join(
+            ["0.0.%d.0" % i for i in range(num_cpus)]
+        )
 
         # Parse the base address of redistributor.
         redist_base = gic.get_redist_bases()[0]
         redist_frame_size = 0x40000 if gic.sc_gic.has_gicv4_1 else 0x20000
-        gic.sc_gic.reg_base_per_redistributor = ','.join([
-            '0.0.%d.0=%#x' % (i, redist_base + redist_frame_size * i)
-            for i in range(num_cpus)
-        ])
+        gic.sc_gic.reg_base_per_redistributor = ",".join(
+            [
+                "0.0.%d.0=%#x" % (i, redist_base + redist_frame_size * i)
+                for i in range(num_cpus)
+            ]
+        )
 
         gic_a2t = AmbaToTlmBridge64(amba=gic.amba_m)
-        gic_t2g = TlmToGem5Bridge64(tlm=gic_a2t.tlm,
-                                    gem5=system.iobus.cpu_side_ports)
+        gic_t2g = TlmToGem5Bridge64(
+            tlm=gic_a2t.tlm, gem5=system.iobus.cpu_side_ports
+        )
         gic_g2t = Gem5ToTlmBridge64(gem5=system.membus.mem_side_ports)
         gic_g2t.addr_ranges = gic.get_addr_ranges()
         gic_t2a = AmbaFromTlmBridge64(tlm=gic_g2t.tlm)
@@ -223,28 +254,36 @@
         system.gic_hub.gic_t2a = gic_t2a
 
         self.voltage_domain = VoltageDomain(voltage=cpu_voltage)
-        self.clk_domain = SrcClockDomain(clock=cpu_clock,
-                                         voltage_domain=self.voltage_domain)
+        self.clk_domain = SrcClockDomain(
+            clock=cpu_clock, voltage_domain=self.voltage_domain
+        )
 
         # Setup CPU
         assert num_cpus <= 4
-        CpuClasses = [FastModelCortexA76x1, FastModelCortexA76x2,
-                      FastModelCortexA76x3, FastModelCortexA76x4]
+        CpuClasses = [
+            FastModelCortexA76x1,
+            FastModelCortexA76x2,
+            FastModelCortexA76x3,
+            FastModelCortexA76x4,
+        ]
         CpuClass = CpuClasses[num_cpus - 1]
 
-        cpu = CpuClass(GICDISABLE=False)
+        cpu = CpuClass(
+            GICDISABLE=False, BROADCASTATOMIC=False, BROADCASTOUTER=False
+        )
         for core in cpu.cores:
             core.semihosting_enable = False
             core.RVBARADDR = 0x10
             core.redistributor = gic.redistributor
             core.createThreads()
             core.createInterruptController()
-        self.cpus = [ cpu ]
+        self.cpus = [cpu]
 
+        self.cpu_hub = SubSystem()
         a2t = AmbaToTlmBridge64(amba=cpu.amba)
         t2g = TlmToGem5Bridge64(tlm=a2t.tlm, gem5=system.membus.cpu_side_ports)
-        system.gic_hub.a2t = a2t
-        system.gic_hub.t2g = t2g
+        self.cpu_hub.a2t = a2t
+        self.cpu_hub.t2g = t2g
 
         system.addCpuCluster(self, num_cpus)
 
@@ -252,7 +291,7 @@
         return False
 
     def memoryMode(self):
-        return 'atomic_noncaching'
+        return "atomic_noncaching"
 
     def addL1(self):
         pass
@@ -263,6 +302,7 @@
     def connectMemSide(self, bus):
         pass
 
+
 class BaseSimpleSystem(ArmSystem):
     cache_line_size = 64
 
@@ -271,15 +311,15 @@
 
         self.voltage_domain = VoltageDomain(voltage="1.0V")
         self.clk_domain = SrcClockDomain(
-            clock="1GHz",
-            voltage_domain=Parent.voltage_domain)
+            clock="1GHz", voltage_domain=Parent.voltage_domain
+        )
 
         if platform is None:
             self.realview = VExpress_GEM5_V1()
         else:
             self.realview = platform
 
-        if hasattr(self.realview.gic, 'cpu_addr'):
+        if hasattr(self.realview.gic, "cpu_addr"):
             self.gic_cpu_addr = self.realview.gic.cpu_addr
 
         self.terminal = Terminal()
@@ -305,7 +345,8 @@
             size_in_range = min(mem_size, mem_range.size())