base, python: Allow dirname selection for the interpreter

This is the second step towards being able to run dynamically linked
applications when the guest ISA != than host ISA.

Once the guest interpreter is loaded to memory, we are able to redirect
shared object loads through the redirectPath interface.
How do we load the guest interpreter?
The elf file is for example asking for the /lib/ld-linux-aarch64.so
interpreter.
That would point to a valid dynamic linker/loader if guest ISA == host
ISA, but if we are running on X86 we should point to the guest
(aarch64 in the example) toolchain wherever it is installed.

This patch is adding the --interp-dir option to point to the parent
folder of the guest /lib in the host fs.

Change-Id: Id27b97c060008d2e847776a49323d45c8809a27f
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/23066
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/configs/common/FileSystemConfig.py b/configs/common/FileSystemConfig.py
index 76ea5ff..33515bc 100644
--- a/configs/common/FileSystemConfig.py
+++ b/configs/common/FileSystemConfig.py
@@ -154,6 +154,18 @@
 
     system.redirect_paths = _redirect_paths(options)
 
+    # Setting the interpreter path. This is used to load the
+    # guest dynamic linker itself from the elf file.
+    interp = getattr(options, 'interp_dir', None)
+    if interp:
+        from m5.core import setInterpDir
+        setInterpDir(interp)
+
+        print("Setting the interpreter path to:", interp,
+              "\nFor dynamically linked applications you might still "
+              "need to setup the --redirects so that libraries are "
+              "found\n")
+
 def register_node(cpu_list, mem, node_number):
     nodebasedir = joinpath(m5.options.outdir, 'fs', 'sys', 'devices',
                            'system', 'node')
diff --git a/configs/common/Options.py b/configs/common/Options.py
index 855c006..c47d4f7 100644
--- a/configs/common/Options.py
+++ b/configs/common/Options.py
@@ -381,6 +381,15 @@
                            "for information or functionality. Instead of "    \
                            "finding files on the __HOST__ filesystem, the "   \
                            "process will find the user's replacment files.")
+    parser.add_option("--interp-dir", action="store", type="string",
+                      default=None,
+                      help="The interp-dir option is used for "
+                           "setting the interpreter's path. This will "
+                           "allow to load the guest dynamic linker/loader "
+                           "itself from the elf binary. The option points to "
+                           "the parent folder of the guest /lib in the "
+                           "host fs")
+
     parser.add_option("--redirects", action="append", type="string",
                       default=[],
                       help="A collection of one or more redirect paths "
diff --git a/src/base/loader/elf_object.cc b/src/base/loader/elf_object.cc
index bbaa27b..06cc33b 100644
--- a/src/base/loader/elf_object.cc
+++ b/src/base/loader/elf_object.cc
@@ -90,9 +90,18 @@
 {
 
 ElfObjectFormat elfObjectFormat;
+std::string interpDir;
 
 } // anonymous namespace
 
+void
+setInterpDir(const std::string &dirname)
+{
+    fatal_if(!interpDir.empty(),
+        "Error: setInterpDir has already been called once\n");
+    interpDir = dirname;
+}
+
 ElfObject::ElfObject(ImageFileDataPtr ifd) : ObjectFile(ifd)
 {
     // get a pointer to elf structure
@@ -119,7 +128,7 @@
             handleLoadableSegment(phdr, i);
         if (phdr.p_type == PT_INTERP) {
             // Make sure the interpreter is an valid ELF file.
-            char *interp_path = (char *)imageData->data() + phdr.p_offset;
+            auto interp_path = getInterpPath(phdr);
             ObjectFile *obj = createObjectFile(interp_path);
             interpreter = dynamic_cast<ElfObject *>(obj);
             assert(interpreter != nullptr);
@@ -137,6 +146,17 @@
     // We will actually read the sections when we need to load them
 }
 
+std::string
+ElfObject::getInterpPath(const GElf_Phdr &phdr) const
+{
+    // This is the interpreter path as specified in the elf file
+    const std::string elf_path = (char *)imageData->data() + phdr.p_offset;
+    if (!interpDir.empty())
+        return interpDir + elf_path;
+    else
+        return elf_path;
+}
+
 void
 ElfObject::determineArch()
 {
diff --git a/src/base/loader/elf_object.hh b/src/base/loader/elf_object.hh
index 2cc2016..8b72cee 100644
--- a/src/base/loader/elf_object.hh
+++ b/src/base/loader/elf_object.hh
@@ -116,6 +116,8 @@
 
 
     ObjectFile *getInterpreter() const override { return interpreter; }
+    std::string getInterpPath(const GElf_Phdr &phdr) const;
+
     Addr bias() const override { return ldBias; }
     bool relocatable() const override { return relocate; }
     Addr mapSize() const override { return ldMax - ldMin; }
@@ -128,4 +130,13 @@
     uint16_t programHeaderCount() {return _programHeaderCount;}
 };
 
+/**
+ * This is the interface for setting up a base path for the
+ * elf interpreter. This is needed when loading a
+ * cross-compiled (guest ISA != host ISA) dynamically
+ * linked application.
+ * @param dirname base path for the interpreter
+ */
+void setInterpDir(const std::string &dirname);
+
 #endif // __BASE_LOADER_ELF_OBJECT_HH__
diff --git a/src/python/m5/core.py b/src/python/m5/core.py
index 4c94353..c8c57fc 100644
--- a/src/python/m5/core.py
+++ b/src/python/m5/core.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2019 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2008 The Hewlett-Packard Development Company
 # All rights reserved.
 #
@@ -30,3 +42,4 @@
 from __future__ import absolute_import
 
 from _m5.core import setOutputDir
+from _m5.loader import setInterpDir
diff --git a/src/python/pybind11/core.cc b/src/python/pybind11/core.cc
index 3523c06..2cfaecd 100644
--- a/src/python/pybind11/core.cc
+++ b/src/python/pybind11/core.cc
@@ -53,6 +53,7 @@
 
 #include "base/addr_range.hh"
 #include "base/inet.hh"
+#include "base/loader/elf_object.hh"
 #include "base/logging.hh"
 #include "base/random.hh"
 #include "base/socket.hh"
@@ -202,6 +203,14 @@
         ;
 }
 
+static void
+init_loader(py::module &m_native)
+{
+    py::module m = m_native.def_submodule("loader");
+
+    m.def("setInterpDir", &setInterpDir);
+}
+
 void
 pybind_init_core(py::module &m_native)
 {
@@ -281,5 +290,6 @@
     init_serialize(m_native);
     init_range(m_native);
     init_net(m_native);
+    init_loader(m_native);
 }