mem-cache: Switch to new-style stats

This change puts cache and tag stats into a Stats::Group struct. This
makes it easier to identify stat updates (they are prefixed with
stat.) and adds hierarchy information for output formats that need it.

Change-Id: I2b8e9138f1cb977abb445ec864d80a79b588481d
Signed-off-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/21140
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index bc29c8c..2026696 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -107,7 +107,8 @@
       noTargetMSHR(nullptr),
       missCount(p->max_miss_count),
       addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()),
-      system(p->system)
+      system(p->system),
+      stats(*this)
 {
     // the MSHR queue has no reserve entries as we check the MSHR
     // queue on every single allocation, whereas the write queue has
@@ -273,7 +274,7 @@
                         pkt->print());
 
                 assert(pkt->req->masterId() < system->maxMasters());
-                mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
+                stats.cmdStats(pkt).mshr_hits[pkt->req->masterId()]++;
 
                 // We use forward_time here because it is the same
                 // considering new targets. We have multiple
@@ -297,7 +298,7 @@
     } else {
         // no MSHR
         assert(pkt->req->masterId() < system->maxMasters());
-        mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
+        stats.cmdStats(pkt).mshr_misses[pkt->req->masterId()]++;
 
         if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean) {
             // We use forward_time here because there is an
@@ -440,18 +441,16 @@
     }
 
     // Initial target is used just for stats
-    QueueEntry::Target *initial_tgt = mshr->getTarget();
-    int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
-    Tick miss_latency = curTick() - initial_tgt->recvTime;
-
+    const QueueEntry::Target *initial_tgt = mshr->getTarget();
+    const Tick miss_latency = curTick() - initial_tgt->recvTime;
     if (pkt->req->isUncacheable()) {
         assert(pkt->req->masterId() < system->maxMasters());
-        mshr_uncacheable_lat[stats_cmd_idx][pkt->req->masterId()] +=
-            miss_latency;
+        stats.cmdStats(initial_tgt->pkt)
+            .mshr_uncacheable_lat[pkt->req->masterId()] += miss_latency;
     } else {
         assert(pkt->req->masterId() < system->maxMasters());
-        mshr_miss_latency[stats_cmd_idx][pkt->req->masterId()] +=
-            miss_latency;
+        stats.cmdStats(initial_tgt->pkt)
+            .mshr_miss_latency[pkt->req->masterId()] += miss_latency;
     }
 
     PacketList writebacks;
@@ -779,7 +778,7 @@
                 // Update statistic on number of prefetches issued
                 // (hwpf_mshr_misses)
                 assert(pkt->req->masterId() < system->maxMasters());
-                mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
+                stats.cmdStats(pkt).mshr_misses[pkt->req->masterId()]++;
 
                 // allocate an MSHR and return it, note
                 // that we send the packet straight away, so do not
@@ -866,7 +865,7 @@
         }
 
         // Update the number of data expansions
-        dataExpansions++;
+        stats.dataExpansions++;
 
         DPRINTF(CacheComp, "Data expansion: expanding [%s] from %d to %d bits"
                 "\n", blk->print(), prev_size, compression_size);
@@ -885,7 +884,7 @@
     for (const auto& evict_blk : evict_blks) {
         if (evict_blk->isValid()) {
             if (evict_blk->wasPrefetched()) {
-                unusedPrefetches++;
+                stats.unusedPrefetches++;
             }
             evictBlock(evict_blk, writebacks);
         }
@@ -1499,14 +1498,14 @@
                         addr, is_secure);
 
                 if (blk->wasPrefetched()) {
-                    unusedPrefetches++;
+                    stats.unusedPrefetches++;
                 }
 
                 evictBlock(blk, writebacks);
             }
         }
 
-        replacements++;
+        stats.replacements++;
     }
 
     // If using a compressor, set compression data. This must be done before
@@ -1550,7 +1549,7 @@
                   "Writeback from read-only cache");
     assert(blk && blk->isValid() && (blk->isDirty() || writebackClean));
 
-    writebacks[Request::wbMasterId]++;
+    stats.writebacks[Request::wbMasterId]++;
 
     RequestPtr req = std::make_shared<Request>(
         regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId);
@@ -1873,248 +1872,350 @@
     }
 }
 
-void
-BaseCache::regStats()
-{
-    ClockedObject::regStats();
 
+BaseCache::CacheCmdStats::CacheCmdStats(BaseCache &c,
+                                        const std::string &name)
+    : Stats::Group(&c), cache(c),
+
+    hits(
+        this, (name + "_hits").c_str(),
+        ("number of " + name + " hits").c_str()),
+    misses(
+        this, (name + "_misses").c_str(),
+        ("number of " + name + " misses").c_str()),
+    missLatency(
+        this, (name + "_miss_latency").c_str(),
+        ("number of " + name + " miss cycles").c_str()),
+    accesses(
+        this, (name + "_accesses").c_str(),
+        ("number of " + name + " accesses(hits+misses)").c_str()),
+    missRate(
+        this, (name + "_miss_rate").c_str(),
+        ("miss rate for " + name + " accesses").c_str()),
+    avgMissLatency(
+        this, (name + "_avg_miss_latency").c_str(),
+        ("average " + name + " miss latency").c_str()),
+    mshr_hits(
+        this, (name + "_mshr_hits").c_str(),
+        ("number of " + name + " MSHR hits").c_str()),
+    mshr_misses(
+        this, (name + "_mshr_misses").c_str(),
+        ("number of " + name + " MSHR misses").c_str()),
+    mshr_uncacheable(
+        this, (name + "_mshr_uncacheable").c_str(),
+        ("number of " + name + " MSHR uncacheable").c_str()),
+    mshr_miss_latency(
+        this, (name + "_mshr_miss_latency").c_str(),
+        ("number of " + name + " MSHR miss cycles").c_str()),
+    mshr_uncacheable_lat(
+        this, (name + "_mshr_uncacheable_latency").c_str(),
+        ("number of " + name + " MSHR uncacheable cycles").c_str()),
+    mshrMissRate(
+        this, (name + "_mshr_miss_rate").c_str(),
+        ("mshr miss rate for " + name + " accesses").c_str()),
+    avgMshrMissLatency(
+        this, (name + "_avg_mshr_miss_latency").c_str(),
+        ("average " + name + " mshr miss latency").c_str()),
+    avgMshrUncacheableLatency(
+        this, (name + "_avg_mshr_uncacheable_latency").c_str(),
+        ("average " + name + " mshr uncacheable latency").c_str())
+{
+}
+
+void
+BaseCache::CacheCmdStats::regStatsFromParent()
+{
     using namespace Stats;
 
-    // Hit statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
+    Stats::Group::regStats();
+    System *system = cache.system;
+    const auto max_masters = system->maxMasters();
 
-        hits[access_idx]
-            .init(system->maxMasters())
-            .name(name() + "." + cstr + "_hits")
-            .desc("number of " + cstr + " hits")
-            .flags(total | nozero | nonan)
-            ;
-        for (int i = 0; i < system->maxMasters(); i++) {
-            hits[access_idx].subname(i, system->getMasterName(i));
-        }
+    hits
+        .init(max_masters)
+        .flags(total | nozero | nonan)
+        ;
+    for (int i = 0; i < max_masters; i++) {
+        hits.subname(i, system->getMasterName(i));
     }
 
+    // Miss statistics
+    misses
+        .init(max_masters)
+        .flags(total | nozero | nonan)
+        ;
+    for (int i = 0; i < max_masters; i++) {
+        misses.subname(i, system->getMasterName(i));
+    }
+
+    // Miss latency statistics
+    missLatency
+        .init(max_masters)
+        .flags(total | nozero | nonan)
+        ;
+    for (int i = 0; i < max_masters; i++) {
+        missLatency.subname(i, system->getMasterName(i));
+    }
+
+    // access formulas
+    accesses.flags(total | nozero | nonan);
+    accesses = hits + misses;
+    for (int i = 0; i < max_masters; i++) {
+        accesses.subname(i, system->getMasterName(i));
+    }
+
+    // miss rate formulas
+    missRate.flags(total | nozero | nonan);
+    missRate = misses / accesses;
+    for (int i = 0; i < max_masters; i++) {
+        missRate.subname(i, system->getMasterName(i));
+    }
+
+    // miss latency formulas
+    avgMissLatency.flags(total | nozero | nonan);
+    avgMissLatency = missLatency / misses;
+    for (int i = 0; i < max_masters; i++) {
+        avgMissLatency.subname(i, system->getMasterName(i));
+    }
+
+    // MSHR statistics
+    // MSHR hit statistics
+    mshr_hits
+        .init(max_masters)
+        .flags(total | nozero | nonan)
+        ;
+    for (int i = 0; i < max_masters; i++) {
+        mshr_hits.subname(i, system->getMasterName(i));
+    }
+
+    // MSHR miss statistics
+    mshr_misses
+        .init(max_masters)
+        .flags(total | nozero | nonan)
+        ;
+    for (int i = 0; i < max_masters; i++) {
+        mshr_misses.subname(i, system->getMasterName(i));
+    }
+
+    // MSHR miss latency statistics
+    mshr_miss_latency
+        .init(max_masters)
+        .flags(total | nozero | nonan)
+        ;
+    for (int i = 0; i < max_masters; i++) {
+        mshr_miss_latency.subname(i, system->getMasterName(i));
+    }
+
+    // MSHR uncacheable statistics
+    mshr_uncacheable
+        .init(max_masters)
+        .flags(total | nozero | nonan)
+        ;
+    for (int i = 0; i < max_masters; i++) {
+        mshr_uncacheable.subname(i, system->getMasterName(i));
+    }
+
+    // MSHR miss latency statistics
+    mshr_uncacheable_lat
+        .init(max_masters)
+        .flags(total | nozero | nonan)
+        ;
+    for (int i = 0; i < max_masters; i++) {
+        mshr_uncacheable_lat.subname(i, system->getMasterName(i));
+    }
+
+    // MSHR miss rate formulas
+    mshrMissRate.flags(total | nozero | nonan);
+    mshrMissRate = mshr_misses / accesses;
+
+    for (int i = 0; i < max_masters; i++) {
+        mshrMissRate.subname(i, system->getMasterName(i));
+    }
+
+    // mshrMiss latency formulas
+    avgMshrMissLatency.flags(total | nozero | nonan);
+    avgMshrMissLatency = mshr_miss_latency / mshr_misses;
+    for (int i = 0; i < max_masters; i++) {
+        avgMshrMissLatency.subname(i, system->getMasterName(i));
+    }
+
+    // mshrUncacheable latency formulas
+    avgMshrUncacheableLatency.flags(total | nozero | nonan);
+    avgMshrUncacheableLatency = mshr_uncacheable_lat / mshr_uncacheable;
+    for (int i = 0; i < max_masters; i++) {
+        avgMshrUncacheableLatency.subname(i, system->getMasterName(i));
+    }
+}
+
+BaseCache::CacheStats::CacheStats(BaseCache &c)
+    : Stats::Group(&c), cache(c),
+
+    demandHits(this, "demand_hits", "number of demand (read+write) hits"),
+
+    overallHits(this, "overall_hits", "number of overall hits"),
+    demandMisses(this, "demand_misses",
+                 "number of demand (read+write) misses"),
+    overallMisses(this, "overall_misses", "number of overall misses"),
+    demandMissLatency(this, "demand_miss_latency",
+                      "number of demand (read+write) miss cycles"),
+    overallMissLatency(this, "overall_miss_latency",
+                       "number of overall miss cycles"),
+    demandAccesses(this, "demand_accesses",
+                   "number of demand (read+write) accesses"),
+    overallAccesses(this, "overall_accesses",
+                    "number of overall (read+write) accesses"),
+    demandMissRate(this, "demand_miss_rate",
+                   "miss rate for demand accesses"),
+    overallMissRate(this, "overall_miss_rate",
+                    "miss rate for overall accesses"),
+    demandAvgMissLatency(this, "demand_avg_miss_latency",
+                         "average overall miss latency"),
+    overallAvgMissLatency(this, "overall_avg_miss_latency",
+                          "average overall miss latency"),
+    blocked_cycles(this, "blocked_cycles",
+                   "number of cycles access was blocked"),
+    blocked_causes(this, "blocked", "number of cycles access was blocked"),
+    avg_blocked(this, "avg_blocked_cycles",
+                "average number of cycles each access was blocked"),
+    unusedPrefetches(this, "unused_prefetches",
+                     "number of HardPF blocks evicted w/o reference"),
+    writebacks(this, "writebacks", "number of writebacks"),
+    demandMshrHits(this, "demand_mshr_hits",
+                   "number of demand (read+write) MSHR hits"),
+    overallMshrHits(this, "overall_mshr_hits",
+                    "number of overall MSHR hits"),
+    demandMshrMisses(this, "demand_mshr_misses",
+                     "number of demand (read+write) MSHR misses"),
+    overallMshrMisses(this, "overall_mshr_misses",
+                      "number of overall MSHR misses"),
+    overallMshrUncacheable(this, "overall_mshr_uncacheable_misses",
+                           "number of overall MSHR uncacheable misses"),
+    demandMshrMissLatency(this, "demand_mshr_miss_latency",
+                          "number of demand (read+write) MSHR miss cycles"),
+    overallMshrMissLatency(this, "overall_mshr_miss_latency",
+                           "number of overall MSHR miss cycles"),
+    overallMshrUncacheableLatency(this, "overall_mshr_uncacheable_latency",
+                                  "number of overall MSHR uncacheable cycles"),
+    demandMshrMissRate(this, "demand_mshr_miss_rate",
+                       "mshr miss rate for demand accesses"),
+    overallMshrMissRate(this, "overall_mshr_miss_rate",
+                        "mshr miss rate for overall accesses"),
+    demandAvgMshrMissLatency(this, "demand_avg_mshr_miss_latency",
+                             "average overall mshr miss latency"),
+    overallAvgMshrMissLatency(this, "overall_avg_mshr_miss_latency",
+                              "average overall mshr miss latency"),
+    overallAvgMshrUncacheableLatency(
+        this, "overall_avg_mshr_uncacheable_latency",
+        "average overall mshr uncacheable latency"),
+    replacements(this, "replacements", "number of replacements"),
+
+    dataExpansions(this, "data_expansions", "number of data expansions"),
+    cmd(MemCmd::NUM_MEM_CMDS)
+{
+    for (int idx = 0; idx < MemCmd::NUM_MEM_CMDS; ++idx)
+        cmd[idx].reset(new CacheCmdStats(c, MemCmd(idx).toString()));
+}
+
+void
+BaseCache::CacheStats::regStats()
+{
+    using namespace Stats;
+
+    Stats::Group::regStats();
+
+    System *system = cache.system;
+    const auto max_masters = system->maxMasters();
+
+    for (auto &cs : cmd)
+        cs->regStatsFromParent();
+
 // These macros make it easier to sum the right subset of commands and
 // to change the subset of commands that are considered "demand" vs
 // "non-demand"
-#define SUM_DEMAND(s) \
-    (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::WriteLineReq] + \
-     s[MemCmd::ReadExReq] + s[MemCmd::ReadCleanReq] + s[MemCmd::ReadSharedReq])
+#define SUM_DEMAND(s)                                                   \
+    (cmd[MemCmd::ReadReq]->s + cmd[MemCmd::WriteReq]->s +               \
+     cmd[MemCmd::WriteLineReq]->s + cmd[MemCmd::ReadExReq]->s +         \
+     cmd[MemCmd::ReadCleanReq]->s + cmd[MemCmd::ReadSharedReq]->s)
 
 // should writebacks be included here?  prior code was inconsistent...
-#define SUM_NON_DEMAND(s) \
-    (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq] + s[MemCmd::SoftPFExReq])
+#define SUM_NON_DEMAND(s)                                       \
+    (cmd[MemCmd::SoftPFReq]->s + cmd[MemCmd::HardPFReq]->s +    \
+     cmd[MemCmd::SoftPFExReq]->s)
 
-    demandHits
-        .name(name() + ".demand_hits")
-        .desc("number of demand (read+write) hits")
-        .flags(total | nozero | nonan)
-        ;
+    demandHits.flags(total | nozero | nonan);
     demandHits = SUM_DEMAND(hits);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandHits.subname(i, system->getMasterName(i));
     }
 
-    overallHits
-        .name(name() + ".overall_hits")
-        .desc("number of overall hits")
-        .flags(total | nozero | nonan)
-        ;
+    overallHits.flags(total | nozero | nonan);
     overallHits = demandHits + SUM_NON_DEMAND(hits);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallHits.subname(i, system->getMasterName(i));
     }
 
-    // Miss statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        misses[access_idx]
-            .init(system->maxMasters())
-            .name(name() + "." + cstr + "_misses")
-            .desc("number of " + cstr + " misses")
-            .flags(total | nozero | nonan)
-            ;
-        for (int i = 0; i < system->maxMasters(); i++) {
-            misses[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    demandMisses
-        .name(name() + ".demand_misses")
-        .desc("number of demand (read+write) misses")
-        .flags(total | nozero | nonan)
-        ;
+    demandMisses.flags(total | nozero | nonan);
     demandMisses = SUM_DEMAND(misses);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandMisses.subname(i, system->getMasterName(i));
     }
 
-    overallMisses
-        .name(name() + ".overall_misses")
-        .desc("number of overall misses")
-        .flags(total | nozero | nonan)
-        ;
+    overallMisses.flags(total | nozero | nonan);
     overallMisses = demandMisses + SUM_NON_DEMAND(misses);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallMisses.subname(i, system->getMasterName(i));
     }
 
-    // Miss latency statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        missLatency[access_idx]
-            .init(system->maxMasters())
-            .name(name() + "." + cstr + "_miss_latency")
-            .desc("number of " + cstr + " miss cycles")
-            .flags(total | nozero | nonan)
-            ;
-        for (int i = 0; i < system->maxMasters(); i++) {
-            missLatency[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    demandMissLatency
-        .name(name() + ".demand_miss_latency")
-        .desc("number of demand (read+write) miss cycles")
-        .flags(total | nozero | nonan)
-        ;
+    demandMissLatency.flags(total | nozero | nonan);
     demandMissLatency = SUM_DEMAND(missLatency);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandMissLatency.subname(i, system->getMasterName(i));
     }
 
-    overallMissLatency
-        .name(name() + ".overall_miss_latency")
-        .desc("number of overall miss cycles")
-        .flags(total | nozero | nonan)
-        ;
+    overallMissLatency.flags(total | nozero | nonan);
     overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallMissLatency.subname(i, system->getMasterName(i));
     }
 
-    // access formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        accesses[access_idx]
-            .name(name() + "." + cstr + "_accesses")
-            .desc("number of " + cstr + " accesses(hits+misses)")
-            .flags(total | nozero | nonan)
-            ;
-        accesses[access_idx] = hits[access_idx] + misses[access_idx];
-
-        for (int i = 0; i < system->maxMasters(); i++) {
-            accesses[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    demandAccesses
-        .name(name() + ".demand_accesses")
-        .desc("number of demand (read+write) accesses")
-        .flags(total | nozero | nonan)
-        ;
+    demandAccesses.flags(total | nozero | nonan);
     demandAccesses = demandHits + demandMisses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandAccesses.subname(i, system->getMasterName(i));
     }
 
-    overallAccesses
-        .name(name() + ".overall_accesses")
-        .desc("number of overall (read+write) accesses")
-        .flags(total | nozero | nonan)
-        ;
+    overallAccesses.flags(total | nozero | nonan);
     overallAccesses = overallHits + overallMisses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallAccesses.subname(i, system->getMasterName(i));
     }
 
-    // miss rate formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        missRate[access_idx]
-            .name(name() + "." + cstr + "_miss_rate")
-            .desc("miss rate for " + cstr + " accesses")
-            .flags(total | nozero | nonan)
-            ;
-        missRate[access_idx] = misses[access_idx] / accesses[access_idx];
-
-        for (int i = 0; i < system->maxMasters(); i++) {
-            missRate[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    demandMissRate
-        .name(name() + ".demand_miss_rate")
-        .desc("miss rate for demand accesses")
-        .flags(total | nozero | nonan)
-        ;
+    demandMissRate.flags(total | nozero | nonan);
     demandMissRate = demandMisses / demandAccesses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandMissRate.subname(i, system->getMasterName(i));
     }
 
-    overallMissRate
-        .name(name() + ".overall_miss_rate")
-        .desc("miss rate for overall accesses")
-        .flags(total | nozero | nonan)
-        ;
+    overallMissRate.flags(total | nozero | nonan);
     overallMissRate = overallMisses / overallAccesses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallMissRate.subname(i, system->getMasterName(i));
     }
 
-    // miss latency formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        avgMissLatency[access_idx]
-            .name(name() + "." + cstr + "_avg_miss_latency")
-            .desc("average " + cstr + " miss latency")
-            .flags(total | nozero | nonan)
-            ;
-        avgMissLatency[access_idx] =
-            missLatency[access_idx] / misses[access_idx];
-
-        for (int i = 0; i < system->maxMasters(); i++) {
-            avgMissLatency[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    demandAvgMissLatency
-        .name(name() + ".demand_avg_miss_latency")
-        .desc("average overall miss latency")
-        .flags(total | nozero | nonan)
-        ;
+    demandAvgMissLatency.flags(total | nozero | nonan);
     demandAvgMissLatency = demandMissLatency / demandMisses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandAvgMissLatency.subname(i, system->getMasterName(i));
     }
 
-    overallAvgMissLatency
-        .name(name() + ".overall_avg_miss_latency")
-        .desc("average overall miss latency")
-        .flags(total | nozero | nonan)
-        ;
+    overallAvgMissLatency.flags(total | nozero | nonan);
     overallAvgMissLatency = overallMissLatency / overallMisses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallAvgMissLatency.subname(i, system->getMasterName(i));
     }
 
     blocked_cycles.init(NUM_BLOCKED_CAUSES);
     blocked_cycles
-        .name(name() + ".blocked_cycles")
-        .desc("number of cycles access was blocked")
         .subname(Blocked_NoMSHRs, "no_mshrs")
         .subname(Blocked_NoTargets, "no_targets")
         ;
@@ -2122,320 +2223,111 @@
 
     blocked_causes.init(NUM_BLOCKED_CAUSES);
     blocked_causes
-        .name(name() + ".blocked")
-        .desc("number of cycles access was blocked")
         .subname(Blocked_NoMSHRs, "no_mshrs")
         .subname(Blocked_NoTargets, "no_targets")
         ;
 
     avg_blocked
-        .name(name() + ".avg_blocked_cycles")
-        .desc("average number of cycles each access was blocked")
         .subname(Blocked_NoMSHRs, "no_mshrs")
         .subname(Blocked_NoTargets, "no_targets")
         ;
-
     avg_blocked = blocked_cycles / blocked_causes;
 
-    unusedPrefetches
-        .name(name() + ".unused_prefetches")
-        .desc("number of HardPF blocks evicted w/o reference")
-        .flags(nozero)
-        ;
+    unusedPrefetches.flags(nozero);
 
     writebacks
-        .init(system->maxMasters())
-        .name(name() + ".writebacks")
-        .desc("number of writebacks")
+        .init(max_masters)
         .flags(total | nozero | nonan)
         ;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         writebacks.subname(i, system->getMasterName(i));
     }
 
-    // MSHR statistics
-    // MSHR hit statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_hits[access_idx]
-            .init(system->maxMasters())
-            .name(name() + "." + cstr + "_mshr_hits")
-            .desc("number of " + cstr + " MSHR hits")
-            .flags(total | nozero | nonan)
-            ;
-        for (int i = 0; i < system->maxMasters(); i++) {
-            mshr_hits[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    demandMshrHits
-        .name(name() + ".demand_mshr_hits")
-        .desc("number of demand (read+write) MSHR hits")
-        .flags(total | nozero | nonan)
-        ;
+    demandMshrHits.flags(total | nozero | nonan);
     demandMshrHits = SUM_DEMAND(mshr_hits);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandMshrHits.subname(i, system->getMasterName(i));
     }
 
-    overallMshrHits
-        .name(name() + ".overall_mshr_hits")
-        .desc("number of overall MSHR hits")
-        .flags(total | nozero | nonan)
-        ;
+    overallMshrHits.flags(total | nozero | nonan);
     overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallMshrHits.subname(i, system->getMasterName(i));
     }
 
-    // MSHR miss statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_misses[access_idx]
-            .init(system->maxMasters())
-            .name(name() + "." + cstr + "_mshr_misses")
-            .desc("number of " + cstr + " MSHR misses")
-            .flags(total | nozero | nonan)
-            ;
-        for (int i = 0; i < system->maxMasters(); i++) {
-            mshr_misses[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    demandMshrMisses
-        .name(name() + ".demand_mshr_misses")
-        .desc("number of demand (read+write) MSHR misses")
-        .flags(total | nozero | nonan)
-        ;
+    demandMshrMisses.flags(total | nozero | nonan);
     demandMshrMisses = SUM_DEMAND(mshr_misses);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandMshrMisses.subname(i, system->getMasterName(i));
     }
 
-    overallMshrMisses
-        .name(name() + ".overall_mshr_misses")
-        .desc("number of overall MSHR misses")
-        .flags(total | nozero | nonan)
-        ;
+    overallMshrMisses.flags(total | nozero | nonan);
     overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallMshrMisses.subname(i, system->getMasterName(i));
     }
 
-    // MSHR miss latency statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_miss_latency[access_idx]
-            .init(system->maxMasters())
-            .name(name() + "." + cstr + "_mshr_miss_latency")
-            .desc("number of " + cstr + " MSHR miss cycles")
-            .flags(total | nozero | nonan)
-            ;
-        for (int i = 0; i < system->maxMasters(); i++) {
-            mshr_miss_latency[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    demandMshrMissLatency
-        .name(name() + ".demand_mshr_miss_latency")
-        .desc("number of demand (read+write) MSHR miss cycles")
-        .flags(total | nozero | nonan)
-        ;
+    demandMshrMissLatency.flags(total | nozero | nonan);
     demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandMshrMissLatency.subname(i, system->getMasterName(i));
     }
 
-    overallMshrMissLatency
-        .name(name() + ".overall_mshr_miss_latency")
-        .desc("number of overall MSHR miss cycles")
-        .flags(total | nozero | nonan)
-        ;
+    overallMshrMissLatency.flags(total | nozero | nonan);
     overallMshrMissLatency =
         demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallMshrMissLatency.subname(i, system->getMasterName(i));
     }
 
-    // MSHR uncacheable statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_uncacheable[access_idx]
-            .init(system->maxMasters())
-            .name(name() + "." + cstr + "_mshr_uncacheable")
-            .desc("number of " + cstr + " MSHR uncacheable")
-            .flags(total | nozero | nonan)
-            ;
-        for (int i = 0; i < system->maxMasters(); i++) {
-            mshr_uncacheable[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    overallMshrUncacheable
-        .name(name() + ".overall_mshr_uncacheable_misses")
-        .desc("number of overall MSHR uncacheable misses")
-        .flags(total | nozero | nonan)
-        ;
+    overallMshrUncacheable.flags(total | nozero | nonan);
     overallMshrUncacheable =
         SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallMshrUncacheable.subname(i, system->getMasterName(i));
     }
 
-    // MSHR miss latency statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
 
-        mshr_uncacheable_lat[access_idx]
-            .init(system->maxMasters())
-            .name(name() + "." + cstr + "_mshr_uncacheable_latency")
-            .desc("number of " + cstr + " MSHR uncacheable cycles")
-            .flags(total | nozero | nonan)
-            ;
-        for (int i = 0; i < system->maxMasters(); i++) {
-            mshr_uncacheable_lat[access_idx].subname(
-                i, system->getMasterName(i));
-        }
-    }
-
-    overallMshrUncacheableLatency
-        .name(name() + ".overall_mshr_uncacheable_latency")
-        .desc("number of overall MSHR uncacheable cycles")
-        .flags(total | nozero | nonan)
-        ;
+    overallMshrUncacheableLatency.flags(total | nozero | nonan);
     overallMshrUncacheableLatency =
         SUM_DEMAND(mshr_uncacheable_lat) +
         SUM_NON_DEMAND(mshr_uncacheable_lat);
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallMshrUncacheableLatency.subname(i, system->getMasterName(i));
     }
 
-    // MSHR miss rate formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshrMissRate[access_idx]
-            .name(name() + "." + cstr + "_mshr_miss_rate")
-            .desc("mshr miss rate for " + cstr + " accesses")
-            .flags(total | nozero | nonan)
-            ;
-        mshrMissRate[access_idx] =
-            mshr_misses[access_idx] / accesses[access_idx];
-
-        for (int i = 0; i < system->maxMasters(); i++) {
-            mshrMissRate[access_idx].subname(i, system->getMasterName(i));
-        }
-    }
-
-    demandMshrMissRate
-        .name(name() + ".demand_mshr_miss_rate")
-        .desc("mshr miss rate for demand accesses")
-        .flags(total | nozero | nonan)
-        ;
+    demandMshrMissRate.flags(total | nozero | nonan);
     demandMshrMissRate = demandMshrMisses / demandAccesses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandMshrMissRate.subname(i, system->getMasterName(i));
     }
 
-    overallMshrMissRate
-        .name(name() + ".overall_mshr_miss_rate")
-        .desc("mshr miss rate for overall accesses")
-        .flags(total | nozero | nonan)
-        ;
+    overallMshrMissRate.flags(total | nozero | nonan);
     overallMshrMissRate = overallMshrMisses / overallAccesses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallMshrMissRate.subname(i, system->getMasterName(i));
     }
 
-    // mshrMiss latency formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        avgMshrMissLatency[access_idx]
-            .name(name() + "." + cstr + "_avg_mshr_miss_latency")
-            .desc("average " + cstr + " mshr miss latency")
-            .flags(total | nozero | nonan)
-            ;
-        avgMshrMissLatency[access_idx] =
-            mshr_miss_latency[access_idx] / mshr_misses[access_idx];
-
-        for (int i = 0; i < system->maxMasters(); i++) {
-            avgMshrMissLatency[access_idx].subname(
-                i, system->getMasterName(i));
-        }
-    }
-
-    demandAvgMshrMissLatency
-        .name(name() + ".demand_avg_mshr_miss_latency")
-        .desc("average overall mshr miss latency")
-        .flags(total | nozero | nonan)
-        ;
+    demandAvgMshrMissLatency.flags(total | nozero | nonan);
     demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         demandAvgMshrMissLatency.subname(i, system->getMasterName(i));
     }
 
-    overallAvgMshrMissLatency
-        .name(name() + ".overall_avg_mshr_miss_latency")
-        .desc("average overall mshr miss latency")
-        .flags(total | nozero | nonan)
-        ;
+    overallAvgMshrMissLatency.flags(total | nozero | nonan);
     overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallAvgMshrMissLatency.subname(i, system->getMasterName(i));
     }
 
-    // mshrUncacheable latency formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        avgMshrUncacheableLatency[access_idx]
-            .name(name() + "." + cstr + "_avg_mshr_uncacheable_latency")
-            .desc("average " + cstr + " mshr uncacheable latency")
-            .flags(total | nozero | nonan)
-            ;
-        avgMshrUncacheableLatency[access_idx] =
-            mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
-
-        for (int i = 0; i < system->maxMasters(); i++) {
-            avgMshrUncacheableLatency[access_idx].subname(
-                i, system->getMasterName(i));
-        }
-    }
-
-    overallAvgMshrUncacheableLatency
-        .name(name() + ".overall_avg_mshr_uncacheable_latency")
-        .desc("average overall mshr uncacheable latency")
-        .flags(total | nozero | nonan)
-        ;
+    overallAvgMshrUncacheableLatency.flags(total | nozero | nonan);
     overallAvgMshrUncacheableLatency =
         overallMshrUncacheableLatency / overallMshrUncacheable;
-    for (int i = 0; i < system->maxMasters(); i++) {
+    for (int i = 0; i < max_masters; i++) {
         overallAvgMshrUncacheableLatency.subname(i, system->getMasterName(i));
     }
 
-    replacements
-        .name(name() + ".replacements")
-        .desc("number of replacements")
-        ;
-
-    dataExpansions
-        .name(name() + ".data_expansions")
-        .desc("number of data expansions")
-        .flags(nozero | nonan)
-        ;
+    dataExpansions.flags(nozero | nonan);
 }
 
 void
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index ceb356a..cd467c8 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013, 2015-2016, 2018 ARM Limited
+ * Copyright (c) 2012-2013, 2015-2016, 2018-2019 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -912,137 +912,155 @@
     /** System we are currently operating in. */
     System *system;
 
-    // Statistics
-    /**
-     * @addtogroup CacheStatistics
-     * @{
-     */
+    struct CacheCmdStats : public Stats::Group
+    {
+        CacheCmdStats(BaseCache &c, const std::string &name);
 
-    /** Number of hits per thread for each type of command.
-        @sa Packet::Command */
-    Stats::Vector hits[MemCmd::NUM_MEM_CMDS];
-    /** Number of hits for demand accesses. */
-    Stats::Formula demandHits;
-    /** Number of hit for all accesses. */
-    Stats::Formula overallHits;
+        /**
+         * Callback to register stats from parent
+         * CacheStats::regStats(). We can't use the normal flow since
+         * there is is no guaranteed order and CacheStats::regStats()
+         * needs to rely on these stats being initialised.
+         */
+        void regStatsFromParent();
 
-    /** Number of misses per thread for each type of command.
-        @sa Packet::Command */
-    Stats::Vector misses[MemCmd::NUM_MEM_CMDS];
-    /** Number of misses for demand accesses. */
-    Stats::Formula demandMisses;
-    /** Number of misses for all accesses. */
-    Stats::Formula overallMisses;
+        const BaseCache &cache;
 
-    /**
-     * Total number of cycles per thread/command spent waiting for a miss.
-     * Used to calculate the average miss latency.
-     */
-    Stats::Vector missLatency[MemCmd::NUM_MEM_CMDS];
-    /** Total number of cycles spent waiting for demand misses. */
-    Stats::Formula demandMissLatency;
-    /** Total number of cycles spent waiting for all misses. */
-    Stats::Formula overallMissLatency;
+        /** Number of hits per thread for each type of command.
+            @sa Packet::Command */
+        Stats::Vector hits;
+        /** Number of misses per thread for each type of command.
+            @sa Packet::Command */
+        Stats::Vector misses;
+        /**
+         * Total number of cycles per thread/command spent waiting for a miss.
+         * Used to calculate the average miss latency.
+         */
+        Stats::Vector missLatency;
+        /** The number of accesses per command and thread. */
+        Stats::Formula accesses;
+        /** The miss rate per command and thread. */
+        Stats::Formula missRate;
+        /** The average miss latency per command and thread. */
+        Stats::Formula avgMissLatency;
+        /** Number of misses that hit in the MSHRs per command and thread. */
+        Stats::Vector mshr_hits;
+        /** Number of misses that miss in the MSHRs, per command and thread. */
+        Stats::Vector mshr_misses;
+        /** Number of misses that miss in the MSHRs, per command and thread. */
+        Stats::Vector mshr_uncacheable;
+        /** Total cycle latency of each MSHR miss, per command and thread. */
+        Stats::Vector mshr_miss_latency;
+        /** Total cycle latency of each MSHR miss, per command and thread. */
+        Stats::Vector mshr_uncacheable_lat;
+        /** The miss rate in the MSHRs pre command and thread. */
+        Stats::Formula mshrMissRate;
+        /** The average latency of an MSHR miss, per command and thread. */
+        Stats::Formula avgMshrMissLatency;
+        /** The average latency of an MSHR miss, per command and thread. */
+        Stats::Formula avgMshrUncacheableLatency;
+    };
 
-    /** The number of accesses per command and thread. */
-    Stats::Formula accesses[MemCmd::NUM_MEM_CMDS];
-    /** The number of demand accesses. */
-    Stats::Formula demandAccesses;
-    /** The number of overall accesses. */
-    Stats::Formula overallAccesses;
+    struct CacheStats : public Stats::Group
+    {
+        CacheStats(BaseCache &c);
 
-    /** The miss rate per command and thread. */
-    Stats::Formula missRate[MemCmd::NUM_MEM_CMDS];
-    /** The miss rate of all demand accesses. */
-    Stats::Formula demandMissRate;
-    /** The miss rate for all accesses. */
-    Stats::Formula overallMissRate;
+        void regStats() override;
 
-    /** The average miss latency per command and thread. */
-    Stats::Formula avgMissLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average miss latency for demand misses. */
-    Stats::Formula demandAvgMissLatency;
-    /** The average miss latency for all misses. */
-    Stats::Formula overallAvgMissLatency;
+        CacheCmdStats &cmdStats(const PacketPtr p) {
+            return *cmd[p->cmdToIndex()];
+        }
 
-    /** The total number of cycles blocked for each blocked cause. */
-    Stats::Vector blocked_cycles;
-    /** The number of times this cache blocked for each blocked cause. */
-    Stats::Vector blocked_causes;
+        const BaseCache &cache;
 
-    /** The average number of cycles blocked for each blocked cause. */
-    Stats::Formula avg_blocked;
+        /** Number of hits for demand accesses. */
+        Stats::Formula demandHits;
+        /** Number of hit for all accesses. */
+        Stats::Formula overallHits;
 
-    /** The number of times a HW-prefetched block is evicted w/o reference. */
-    Stats::Scalar unusedPrefetches;
+        /** Number of misses for demand accesses. */
+        Stats::Formula demandMisses;
+        /** Number of misses for all accesses. */
+        Stats::Formula overallMisses;
 
-    /** Number of blocks written back per thread. */
-    Stats::Vector writebacks;
+        /** Total number of cycles spent waiting for demand misses. */
+        Stats::Formula demandMissLatency;
+        /** Total number of cycles spent waiting for all misses. */
+        Stats::Formula overallMissLatency;
 
-    /** Number of misses that hit in the MSHRs per command and thread. */
-    Stats::Vector mshr_hits[MemCmd::NUM_MEM_CMDS];
-    /** Demand misses that hit in the MSHRs. */
-    Stats::Formula demandMshrHits;
-    /** Total number of misses that hit in the MSHRs. */
-    Stats::Formula overallMshrHits;
+        /** The number of demand accesses. */
+        Stats::Formula demandAccesses;
+        /** The number of overall accesses. */
+        Stats::Formula overallAccesses;
 
-    /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector mshr_misses[MemCmd::NUM_MEM_CMDS];
-    /** Demand misses that miss in the MSHRs. */
-    Stats::Formula demandMshrMisses;
-    /** Total number of misses that miss in the MSHRs. */
-    Stats::Formula overallMshrMisses;
+        /** The miss rate of all demand accesses. */
+        Stats::Formula demandMissRate;
+        /** The miss rate for all accesses. */
+        Stats::Formula overallMissRate;
 
-    /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
-    /** Total number of misses that miss in the MSHRs. */
-    Stats::Formula overallMshrUncacheable;
+        /** The average miss latency for demand misses. */
+        Stats::Formula demandAvgMissLatency;
+        /** The average miss latency for all misses. */
+        Stats::Formula overallAvgMissLatency;
 
-    /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
-    /** Total cycle latency of demand MSHR misses. */
-    Stats::Formula demandMshrMissLatency;
-    /** Total cycle latency of overall MSHR misses. */
-    Stats::Formula overallMshrMissLatency;
+        /** The total number of cycles blocked for each blocked cause. */
+        Stats::Vector blocked_cycles;
+        /** The number of times this cache blocked for each blocked cause. */
+        Stats::Vector blocked_causes;
 
-    /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
-    /** Total cycle latency of overall MSHR misses. */
-    Stats::Formula overallMshrUncacheableLatency;
+        /** The average number of cycles blocked for each blocked cause. */
+        Stats::Formula avg_blocked;
 
-    /** The miss rate in the MSHRs pre command and thread. */
-    Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS];
-    /** The demand miss rate in the MSHRs. */
-    Stats::Formula demandMshrMissRate;
-    /** The overall miss rate in the MSHRs. */
-    Stats::Formula overallMshrMissRate;
+        /** The number of times a HW-prefetched block is evicted w/o
+         * reference. */
+        Stats::Scalar unusedPrefetches;
 
-    /** The average latency of an MSHR miss, per command and thread. */
-    Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average latency of a demand MSHR miss. */
-    Stats::Formula demandAvgMshrMissLatency;
-    /** The average overall latency of an MSHR miss. */
-    Stats::Formula overallAvgMshrMissLatency;
+        /** Number of blocks written back per thread. */
+        Stats::Vector writebacks;
 
-    /** The average latency of an MSHR miss, per command and thread. */
-    Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average overall latency of an MSHR miss. */
-    Stats::Formula overallAvgMshrUncacheableLatency;
+        /** Demand misses that hit in the MSHRs. */
+        Stats::Formula demandMshrHits;
+        /** Total number of misses that hit in the MSHRs. */
+        Stats::Formula overallMshrHits;
 
-    /** Number of replacements of valid blocks. */
-    Stats::Scalar replacements;
+        /** Demand misses that miss in the MSHRs. */
+        Stats::Formula demandMshrMisses;
+        /** Total number of misses that miss in the MSHRs. */
+        Stats::Formula overallMshrMisses;
 
-    /** Number of data expansions. */
-    Stats::Scalar dataExpansions;
+        /** Total number of misses that miss in the MSHRs. */
+        Stats::Formula overallMshrUncacheable;
 
-    /**
-     * @}
-     */
+        /** Total cycle latency of demand MSHR misses. */
+        Stats::Formula demandMshrMissLatency;
+        /** Total cycle latency of overall MSHR misses. */
+        Stats::Formula overallMshrMissLatency;
 
-    /**
-     * Register stats for this object.
-     */
-    void regStats() override;
+        /** Total cycle latency of overall MSHR misses. */
+        Stats::Formula overallMshrUncacheableLatency;
+
+        /** The demand miss rate in the MSHRs. */
+        Stats::Formula demandMshrMissRate;
+        /** The overall miss rate in the MSHRs. */
+        Stats::Formula overallMshrMissRate;
+
+        /** The average latency of a demand MSHR miss. */
+        Stats::Formula demandAvgMshrMissLatency;
+        /** The average overall latency of an MSHR miss. */
+        Stats::Formula overallAvgMshrMissLatency;
+
+        /** The average overall latency of an MSHR miss. */
+        Stats::Formula overallAvgMshrUncacheableLatency;
+
+        /** Number of replacements of valid blocks. */
+        Stats::Scalar replacements;
+
+        /** Number of data expansions. */
+        Stats::Scalar dataExpansions;
+
+        /** Per-command statistics */
+        std::vector<std::unique_ptr<CacheCmdStats>> cmd;
+    } stats;
 
     /** Registers probes. */
     void regProbePoints() override;
@@ -1135,7 +1153,7 @@
     {
         uint8_t flag = 1 << cause;
         if (blocked == 0) {
-            blocked_causes[cause]++;
+            stats.blocked_causes[cause]++;
             blockedCycle = curCycle();
             cpuSidePort.setBlocked();
         }
@@ -1156,7 +1174,7 @@
         blocked &= ~flag;
         DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
         if (blocked == 0) {
-            blocked_cycles[cause] += curCycle() - blockedCycle;
+            stats.blocked_cycles[cause] += curCycle() - blockedCycle;
             cpuSidePort.clearBlocked();
         }
     }
@@ -1194,7 +1212,7 @@
     void incMissCount(PacketPtr pkt)
     {
         assert(pkt->req->masterId() < system->maxMasters());
-        misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
+        stats.cmdStats(pkt).misses[pkt->req->masterId()]++;
         pkt->req->incAccessDepth();
         if (missCount) {
             --missCount;
@@ -1205,8 +1223,7 @@
     void incHitCount(PacketPtr pkt)
     {
         assert(pkt->req->masterId() < system->maxMasters());
-        hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
-
+        stats.cmdStats(pkt).hits[pkt->req->masterId()]++;
     }
 
     /**
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index b054cd4..e7dd5ef 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -334,7 +334,7 @@
         // should have flushed and have no valid block
         assert(!blk || !blk->isValid());
 
-        mshr_uncacheable[pkt->cmdToIndex()][pkt->req->masterId()]++;
+        stats.cmdStats(pkt).mshr_uncacheable[pkt->req->masterId()]++;
 
         if (pkt->isWrite()) {
             allocateWriteBuffer(pkt, forward_time);
@@ -776,7 +776,8 @@
                 assert(!tgt_pkt->req->isUncacheable());
 
                 assert(tgt_pkt->req->masterId() < system->maxMasters());
-                missLatency[tgt_pkt->cmdToIndex()][tgt_pkt->req->masterId()] +=
+                stats.cmdStats(tgt_pkt)
+                    .missLatency[tgt_pkt->req->masterId()] +=
                     completion_time - target.recvTime;
             } else if (pkt->cmd == MemCmd::UpgradeFailResp) {
                 // failed StoreCond upgrade
diff --git a/src/mem/cache/noncoherent_cache.cc b/src/mem/cache/noncoherent_cache.cc
index 9a2a1db..f25f768 100644
--- a/src/mem/cache/noncoherent_cache.cc
+++ b/src/mem/cache/noncoherent_cache.cc
@@ -278,7 +278,7 @@
                 (transfer_offset ? pkt->payloadDelay : 0);
 
             assert(tgt_pkt->req->masterId() < system->maxMasters());
-            missLatency[tgt_pkt->cmdToIndex()][tgt_pkt->req->masterId()] +=
+            stats.cmdStats(tgt_pkt).missLatency[tgt_pkt->req->masterId()] +=
                 completion_time - target.recvTime;
 
             tgt_pkt->makeTimingResponse();
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
index 4855ebd..cbfdff2 100644
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013,2016,2018 ARM Limited
+ * Copyright (c) 2013,2016,2018-2019 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -64,8 +64,10 @@
       system(p->system), indexingPolicy(p->indexing_policy),
       warmupBound((p->warmup_percentage/100.0) * (p->size / p->block_size)),
       warmedUp(false), numBlocks(p->size / p->block_size),
-      dataBlks(new uint8_t[p->size]) // Allocate data storage in one big chunk
+      dataBlks(new uint8_t[p->size]), // Allocate data storage in one big chunk
+      stats(*this)
 {
+    registerExitCallback(new BaseTagsCallback(this));
 }
 
 ReplaceableEntry*
@@ -108,21 +110,21 @@
     // Deal with what we are bringing in
     MasterID master_id = pkt->req->masterId();
     assert(master_id < system->maxMasters());
-    occupancies[master_id]++;
+    stats.occupancies[master_id]++;
 
     // Insert block with tag, src master id and task id
     blk->insert(extractTag(pkt->getAddr()), pkt->isSecure(), master_id,
                 pkt->req->taskId());
 
     // Check if cache warm up is done
-    if (!warmedUp && tagsInUse.value() >= warmupBound) {
+    if (!warmedUp && stats.tagsInUse.value() >= warmupBound) {
         warmedUp = true;
-        warmupCycle = curTick();
+        stats.warmupCycle = curTick();
     }
 
     // We only need to write into one tag and one data block.
-    tagAccesses += 1;
-    dataAccesses += 1;
+    stats.tagAccesses += 1;
+    stats.dataAccesses += 1;
 }
 
 Addr
@@ -135,8 +137,8 @@
 BaseTags::cleanupRefsVisitor(CacheBlk &blk)
 {
     if (blk.isValid()) {
-        totalRefs += blk.refCount;
-        ++sampledRefs;
+        stats.totalRefs += blk.refCount;
+        ++stats.sampledRefs;
     }
 }
 
@@ -151,7 +153,7 @@
 {
     if (blk.isValid()) {
         assert(blk.task_id < ContextSwitchTaskId::NumTaskId);
-        occupanciesTaskId[blk.task_id]++;
+        stats.occupanciesTaskId[blk.task_id]++;
         assert(blk.tickInserted <= curTick());
         Tick age = curTick() - blk.tickInserted;
 
@@ -167,7 +169,7 @@
         } else
             age_index = 4; // >10ms
 
-        ageTaskId[blk.task_id][age_index]++;
+        stats.ageTaskId[blk.task_id][age_index]++;
     }
 }
 
@@ -175,9 +177,9 @@
 BaseTags::computeStats()
 {
     for (unsigned i = 0; i < ContextSwitchTaskId::NumTaskId; ++i) {
-        occupanciesTaskId[i] = 0;
+        stats.occupanciesTaskId[i] = 0;
         for (unsigned j = 0; j < 5; ++j) {
-            ageTaskId[i][j] = 0;
+            stats.ageTaskId[i][j] = 0;
         }
     }
 
@@ -201,93 +203,79 @@
     return str;
 }
 
-void
-BaseTags::regStats()
-{
-    ClockedObject::regStats();
+BaseTags::BaseTagStats::BaseTagStats(BaseTags &_tags)
+    : Stats::Group(&_tags),
+    tags(_tags),
 
+    tagsInUse(this, "tagsinuse",
+              "Cycle average of tags in use"),
+    totalRefs(this, "total_refs",
+              "Total number of references to valid blocks."),
+    sampledRefs(this, "sampled_refs",
+                "Sample count of references to valid blocks."),
+    avgRefs(this, "avg_refs",
+            "Average number of references to valid blocks."),
+    warmupCycle(this, "warmup_cycle",
+                "Cycle when the warmup percentage was hit."),
+    occupancies(this, "occ_blocks",
+                "Average occupied blocks per requestor"),
+    avgOccs(this, "occ_percent",
+            "Average percentage of cache occupancy"),
+    occupanciesTaskId(this, "occ_task_id_blocks",
+                      "Occupied blocks per task id"),
+    ageTaskId(this, "age_task_id_blocks", "Occupied blocks per task id"),
+    percentOccsTaskId(this, "occ_task_id_percent",
+                      "Percentage of cache occupancy per task id"),
+    tagAccesses(this, "tag_accesses", "Number of tag accesses"),
+    dataAccesses(this, "data_accesses", "Number of data accesses")
+{
+}
+
+void
+BaseTags::BaseTagStats::regStats()
+{
     using namespace Stats;
 
-    tagsInUse
-        .name(name() + ".tagsinuse")
-        .desc("Cycle average of tags in use")
-        ;
+    Stats::Group::regStats();
 
-    totalRefs
-        .name(name() + ".total_refs")
-        .desc("Total number of references to valid blocks.")
-        ;
+    System *system = tags.system;
 
-    sampledRefs
-        .name(name() + ".sampled_refs")
-        .desc("Sample count of references to valid blocks.")
-        ;
-
-    avgRefs
-        .name(name() + ".avg_refs")
-        .desc("Average number of references to valid blocks.")
-        ;
-
-    avgRefs = totalRefs/sampledRefs;
-
-    warmupCycle
-        .name(name() + ".warmup_cycle")
-        .desc("Cycle when the warmup percentage was hit.")
-        ;
+    avgRefs = totalRefs / sampledRefs;
 
     occupancies
         .init(system->maxMasters())
-        .name(name() + ".occ_blocks")
-        .desc("Average occupied blocks per requestor")
         .flags(nozero | nonan)
         ;
     for (int i = 0; i < system->maxMasters(); i++) {
         occupancies.subname(i, system->getMasterName(i));
     }
 
-    avgOccs
-        .name(name() + ".occ_percent")
-        .desc("Average percentage of cache occupancy")
-        .flags(nozero | total)
-        ;
+    avgOccs.flags(nozero | total);
     for (int i = 0; i < system->maxMasters(); i++) {
         avgOccs.subname(i, system->getMasterName(i));
     }
 
-    avgOccs = occupancies / Stats::constant(numBlocks);
+    avgOccs = occupancies / Stats::constant(tags.numBlocks);
 
     occupanciesTaskId
         .init(ContextSwitchTaskId::NumTaskId)
-        .name(name() + ".occ_task_id_blocks")
-        .desc("Occupied blocks per task id")
         .flags(nozero | nonan)
         ;
 
     ageTaskId
         .init(ContextSwitchTaskId::NumTaskId, 5)
-        .name(name() + ".age_task_id_blocks")
-        .desc("Occupied blocks per task id")
         .flags(nozero | nonan)
         ;
 
-    percentOccsTaskId
-        .name(name() + ".occ_task_id_percent")
-        .desc("Percentage of cache occupancy per task id")
-        .flags(nozero)
-        ;
+    percentOccsTaskId.flags(nozero);
 
-    percentOccsTaskId = occupanciesTaskId / Stats::constant(numBlocks);
+    percentOccsTaskId = occupanciesTaskId / Stats::constant(tags.numBlocks);
+}
 
-    tagAccesses
-        .name(name() + ".tag_accesses")
-        .desc("Number of tag accesses")
-        ;
+void
+BaseTags::BaseTagStats::preDumpStats()
+{
+    Stats::Group::preDumpStats();
 
-    dataAccesses
-        .name(name() + ".data_accesses")
-        .desc("Number of data accesses")
-        ;
-
-    registerDumpCallback(new BaseTagsDumpCallback(this));
-    registerExitCallback(new BaseTagsCallback(this));
+    tags.computeStats();
 }
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index ae9cab8..0a9f16b 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014,2016-2018 ARM Limited
+ * Copyright (c) 2012-2014,2016-2019 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -102,58 +102,60 @@
     /** The data blocks, 1 per cache block. */
     std::unique_ptr<uint8_t[]> dataBlks;
 
-    // Statistics
     /**
      * TODO: It would be good if these stats were acquired after warmup.
-     * @addtogroup CacheStatistics
-     * @{
      */
+    struct BaseTagStats : public Stats::Group
+    {
+        BaseTagStats(BaseTags &tags);
 
-    /** Per cycle average of the number of tags that hold valid data. */
-    Stats::Average tagsInUse;
+        void regStats() override;
+        void preDumpStats() override;
 
-    /** The total number of references to a block before it is replaced. */
-    Stats::Scalar totalRefs;
+        BaseTags &tags;
 
-    /**
-     * The number of reference counts sampled. This is different from
-     * replacements because we sample all the valid blocks when the simulator
-     * exits.
-     */
-    Stats::Scalar sampledRefs;
+        /** Per cycle average of the number of tags that hold valid data. */
+        Stats::Average tagsInUse;
 
-    /**
-     * Average number of references to a block before is was replaced.
-     * @todo This should change to an average stat once we have them.
-     */
-    Stats::Formula avgRefs;
+        /** The total number of references to a block before it is replaced. */
+        Stats::Scalar totalRefs;
 
-    /** The cycle that the warmup percentage was hit. 0 on failure. */
-    Stats::Scalar warmupCycle;
+        /**
+         * The number of reference counts sampled. This is different
+         * from replacements because we sample all the valid blocks
+         * when the simulator exits.
+         */
+        Stats::Scalar sampledRefs;
 
-    /** Average occupancy of each requestor using the cache */
-    Stats::AverageVector occupancies;
+        /**
+         * Average number of references to a block before is was replaced.
+         * @todo This should change to an average stat once we have them.
+         */
+        Stats::Formula avgRefs;
 
-    /** Average occ % of each requestor using the cache */
-    Stats::Formula avgOccs;
+        /** The cycle that the warmup percentage was hit. 0 on failure. */
+        Stats::Scalar warmupCycle;
 
-    /** Occupancy of each context/cpu using the cache */
-    Stats::Vector occupanciesTaskId;
+        /** Average occupancy of each requestor using the cache */
+        Stats::AverageVector occupancies;
 
-    /** Occupancy of each context/cpu using the cache */
-    Stats::Vector2d ageTaskId;
+        /** Average occ % of each requestor using the cache */
+        Stats::Formula avgOccs;
 
-    /** Occ % of each context/cpu using the cache */
-    Stats::Formula percentOccsTaskId;
+        /** Occupancy of each context/cpu using the cache */
+        Stats::Vector occupanciesTaskId;
 
-    /** Number of tags consulted over all accesses. */
-    Stats::Scalar tagAccesses;
-    /** Number of data blocks consulted over all accesses. */
-    Stats::Scalar dataAccesses;
+        /** Occupancy of each context/cpu using the cache */
+        Stats::Vector2d ageTaskId;
 
-    /**
-     * @}
-     */
+        /** Occ % of each context/cpu using the cache */
+        Stats::Formula percentOccsTaskId;
+
+        /** Number of tags consulted over all accesses. */
+        Stats::Scalar tagAccesses;
+        /** Number of data blocks consulted over all accesses. */
+        Stats::Scalar dataAccesses;
+    } stats;
 
   public:
     typedef BaseTagsParams Params;
@@ -172,11 +174,6 @@
     virtual void tagsInit() = 0;
 
     /**
-     * Register local statistics.
-     */
-    void regStats();
-
-    /**
      * Average in the reference count for valid blocks when the simulation
      * exits.
      */
@@ -259,9 +256,9 @@
         assert(blk);
         assert(blk->isValid());
 
-        occupancies[blk->srcMasterId]--;
-        totalRefs += blk->refCount;
-        sampledRefs++;
+        stats.occupancies[blk->srcMasterId]--;
+        stats.totalRefs += blk->refCount;
+        stats.sampledRefs++;
 
         blk->invalidate();
     }
@@ -367,12 +364,4 @@
     virtual void process() { tags->cleanupRefs(); };
 };
 
-class BaseTagsDumpCallback : public Callback
-{
-    BaseTags *tags;
-  public:
-    BaseTagsDumpCallback(BaseTags *t) : tags(t) {}
-    virtual void process() { tags->computeStats(); };
-};
-
 #endif //__MEM_CACHE_TAGS_BASE_HH__
diff --git a/src/mem/cache/tags/base_set_assoc.cc b/src/mem/cache/tags/base_set_assoc.cc
index 1b53ef0..1934ef4 100644
--- a/src/mem/cache/tags/base_set_assoc.cc
+++ b/src/mem/cache/tags/base_set_assoc.cc
@@ -87,7 +87,7 @@
     BaseTags::invalidate(blk);
 
     // Decrease the number of tags in use
-    tagsInUse--;
+    stats.tagsInUse--;
 
     // Invalidate replacement data
     replacementPolicy->invalidate(blk->replacementData);
diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh
index f58f939..efb08ca 100644
--- a/src/mem/cache/tags/base_set_assoc.hh
+++ b/src/mem/cache/tags/base_set_assoc.hh
@@ -131,13 +131,13 @@
         // Access all tags in parallel, hence one in each way.  The data side
         // either accesses all blocks in parallel, or one block sequentially on
         // a hit.  Sequential access with a miss doesn't access data.
-        tagAccesses += allocAssoc;
+        stats.tagAccesses += allocAssoc;
         if (sequentialAccess) {
             if (blk != nullptr) {
-                dataAccesses += 1;
+                stats.dataAccesses += 1;
             }
         } else {
-            dataAccesses += allocAssoc;
+            stats.dataAccesses += allocAssoc;
         }
 
         // If a cache hit
@@ -195,7 +195,7 @@
         BaseTags::insertBlock(pkt, blk);
 
         // Increment tag counter
-        tagsInUse++;
+        stats.tagsInUse++;
 
         // Update replacement policy
         replacementPolicy->reset(blk->replacementData);
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 0ebef4b..5738dcb 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -131,7 +131,7 @@
     BaseTags::invalidate(blk);
 
     // Decrease the number of tags in use
-    tagsInUse--;
+    stats.tagsInUse--;
 
     // Move the block to the tail to make it the next victim
     moveToTail((FALRUBlk*)blk);
@@ -220,7 +220,7 @@
     BaseTags::insertBlock(pkt, blk);
 
     // Increment tag counter
-    tagsInUse++;
+    stats.tagsInUse++;
 
     // New block is the MRU
     moveToHead(falruBlk);
diff --git a/src/mem/cache/tags/sector_tags.cc b/src/mem/cache/tags/sector_tags.cc
index 1098885..77fb53c 100644
--- a/src/mem/cache/tags/sector_tags.cc
+++ b/src/mem/cache/tags/sector_tags.cc
@@ -125,7 +125,7 @@
     // in the sector.
     if (!sector_blk->isValid()) {
         // Decrease the number of tags in use
-        tagsInUse--;
+        stats.tagsInUse--;
 
         // Invalidate replacement data, as we're invalidating the sector
         replacementPolicy->invalidate(sector_blk->replacementData);
@@ -140,13 +140,13 @@
     // Access all tags in parallel, hence one in each way.  The data side
     // either accesses all blocks in parallel, or one block sequentially on
     // a hit.  Sequential access with a miss doesn't access data.
-    tagAccesses += allocAssoc;
+    stats.tagAccesses += allocAssoc;
     if (sequentialAccess) {
         if (blk != nullptr) {
-            dataAccesses += 1;
+            stats.dataAccesses += 1;
         }
     } else {
-        dataAccesses += allocAssoc*numBlocksPerSector;
+        stats.dataAccesses += allocAssoc*numBlocksPerSector;
     }
 
     // If a cache hit
@@ -183,7 +183,7 @@
         replacementPolicy->touch(sector_blk->replacementData);
     } else {
         // Increment tag counter
-        tagsInUse++;
+        stats.tagsInUse++;
 
         // A new entry resets the replacement data
         replacementPolicy->reset(sector_blk->replacementData);