stats: Add support for partial stat dumps

Add support for partial stat dumps by passing an optional 'root'
keyword argument to m5.stats.dump(). Specifying root slightly changes
the semantics of the dump command. For legacy reasons, gem5 only
allows one stat dump per tick. This is likely a limitation introduced
as a hack to prevent automatic dumping at the end of simulation from
interfering with explicit dumping from a simulation script. This
restriction does not apply when specifying a root. However, these stat
dumps will still prevent an additional stat dump in the same tick with
an unspecified root.

N.B.: This new API /only/ works for new-style stats that have an
explicit hierarchy. Legacy stats will not be dumped if a root is
specified.

Change-Id: Idc8ff448b9f70a796427b4a5231e7371485130b4
Signed-off-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/19369
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index 3d2f123..37a13a3 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -1413,6 +1413,13 @@
             return self._name
         return ppath + "." + self._name
 
+    def path_list(self):
+        if self._parent:
+            return self._parent.path_list() + [ self._name, ]
+        else:
+            # Don't include the root node
+            return []
+
     def __str__(self):
         return self.path()
 
diff --git a/src/python/m5/stats/__init__.py b/src/python/m5/stats/__init__.py
index d9174d3..1480144 100644
--- a/src/python/m5/stats/__init__.py
+++ b/src/python/m5/stats/__init__.py
@@ -158,16 +158,18 @@
     _m5.stats.initSimStats()
     _m5.stats.registerPythonStatsHandlers()
 
-def _visit_groups(root, visitor):
+def _visit_groups(visitor, root=None):
+    if root is None:
+        root = Root.getInstance()
     for group in root.getStatGroups().values():
         visitor(group)
-        _visit_groups(group, visitor)
+        _visit_groups(visitor, root=group)
 
-def _visit_stats(root, visitor):
+def _visit_stats(visitor, root=None):
     def for_each_stat(g):
         for stat in g.getStats():
             visitor(g, stat)
-    _visit_groups(root, for_each_stat)
+    _visit_groups(for_each_stat, root=root)
 
 def _bindStatHierarchy(root):
     def _bind_obj(name, obj):
@@ -212,8 +214,8 @@
 
 
     # New stats
-    _visit_stats(Root.getInstance(), check_stat)
-    _visit_stats(Root.getInstance(), lambda g, s: s.enable())
+    _visit_stats(check_stat)
+    _visit_stats(lambda g, s: s.enable())
 
     _m5.stats.enable();
 
@@ -226,14 +228,13 @@
         stat.prepare()
 
     # New stats
-    _visit_stats(Root.getInstance(), lambda g, s: s.prepare())
+    _visit_stats(lambda g, s: s.prepare())
 
-lastDump = 0
-
-def _dump_to_visitor(visitor):
+def _dump_to_visitor(visitor, root=None):
     # Legacy stats
-    for stat in stats_list:
-        stat.visit(visitor)
+    if root is None:
+        for stat in stats_list:
+            stat.visit(visitor)
 
     # New stats
     def dump_group(group):
@@ -245,28 +246,39 @@
             dump_group(g)
             visitor.endGroup()
 
-    dump_group(Root.getInstance())
+    if root is not None:
+        for p in root.path_list():
+            visitor.beginGroup(p)
+    dump_group(root if root is not None else Root.getInstance())
+    if root is not None:
+        for p in reversed(root.path_list()):
+            visitor.endGroup()
 
+lastDump = 0
 
-def dump():
+def dump(root=None):
     '''Dump all statistics data to the registered outputs'''
 
-    curTick = m5.curTick()
-
+    now = m5.curTick()
     global lastDump
-    assert lastDump <= curTick
-    if lastDump == curTick:
+    assert lastDump <= now
+    new_dump = lastDump != now
+    lastDump = now
+
+    # Don't allow multiple global stat dumps in the same tick. It's
+    # still possible to dump a multiple sub-trees.
+    if not new_dump and root is None:
         return
-    lastDump = curTick
 
-    _m5.stats.processDumpQueue()
-
-    prepare()
+    # Only prepare stats the first time we dump them in the same tick.
+    if new_dump:
+        _m5.stats.processDumpQueue()
+        prepare()
 
     for output in outputList:
         if output.valid():
             output.begin()
-            _dump_to_visitor(output)
+            _dump_to_visitor(output, root=root)
             output.end()
 
 def reset():