arch: Bump MaxVecRegLenInBytes to 4096

The GPU model uses the generic vector register
containers, however the maximum vector register
length is fixed at 256, which is an invalid assumption
for the GPU model as it can operate on vectors up
to 4096B.

Change-Id: Id85e0ed45c9a9c1a4bb6e712c44eaeec2d628fce
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/17908
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/arch/generic/vec_reg.hh b/src/arch/generic/vec_reg.hh
index ed2545c..aab307b 100644
--- a/src/arch/generic/vec_reg.hh
+++ b/src/arch/generic/vec_reg.hh
@@ -154,7 +154,7 @@
 #include "base/cprintf.hh"
 #include "base/logging.hh"
 
-constexpr unsigned MaxVecRegLenInBytes = 256;
+constexpr unsigned MaxVecRegLenInBytes = 4096;
 
 template <size_t Sz>
 class VecRegContainer;
@@ -523,6 +523,7 @@
     friend class VecRegContainer<32>;
     friend class VecRegContainer<64>;
     friend class VecRegContainer<128>;
+    friend class VecRegContainer<256>;
     friend class VecRegContainer<MaxVecRegLenInBytes>;
 
     /** My type alias. */