scons: Marshal Python sources using the same Python as gem5

We currently use the Python version used by scons to marshal Python
code. This doesn't work when building gem5 with Python 3 support since
scons typically runs in Python 2.7. Add a custom marshal helper that
links with the same library as gem5 to generate byte code that is
guaranteed to work in gem5's Python interpreter.

Change-Id: I665b0f2078726d4c055d74a3e668a580fc613b59
Signed-off-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/16422
Reviewed-by: Gabe Black <gabeblack@google.com>
diff --git a/src/SConscript b/src/SConscript
index ab0e1fc..1d65649 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -45,10 +45,8 @@
 import bisect
 import functools
 import imp
-import marshal
 import os
 import re
-import subprocess
 import sys
 import zlib
 
@@ -1130,6 +1128,11 @@
                        Transform("VER TAGS")))
 env.AlwaysBuild(tags)
 
+# Build a small helper that marshals the Python code using the same
+# version of Python as gem5. This is in an unorthodox location to
+# avoid building it for every variant.
+py_marshal = env.Program('python/marshal.cc')[0]
+
 # Embed python files.  All .py files that have been indicated by a
 # PySource() call in a SConscript need to be embedded into the M5
 # library.  To do that, we compile the file to byte code, marshal the
@@ -1141,17 +1144,22 @@
             return "0"
         return '"%s"' % string
 
-    '''Action function to compile a .py into a code object, marshal
-    it, compress it, and stick it into an asm file so the code appears
-    as just bytes with a label in the data section'''
+    '''Action function to compile a .py into a code object, marshal it,
+    compress it, and stick it into an asm file so the code appears as
+    just bytes with a label in the data section. The action takes two
+    sources:
 
-    src = file(str(source[0]), 'r').read()
+    source[0]: Binary used to marshal Python sources
+    source[1]: Python script to marshal
+    '''
 
-    pysource = PySource.tnodes[source[0]]
-    compiled = compile(src, pysource.abspath, 'exec')
-    marshalled = marshal.dumps(compiled)
+    import subprocess
+
+    marshalled = subprocess.check_output([source[0].abspath, str(source[1])])
+
     compressed = zlib.compress(marshalled)
     data = compressed
+    pysource = PySource.tnodes[source[1]]
     sym = pysource.symname
 
     code = code_formatter()
@@ -1178,7 +1186,7 @@
     code.write(str(target[0]))
 
 for source in PySource.all:
-    env.Command(source.cpp, source.tnode,
+    env.Command(source.cpp, [ py_marshal, source.tnode ],
                 MakeAction(embedPyFile, Transform("EMBED PY")))
     Source(source.cpp, tags=source.tags, add_tags='python')
 
diff --git a/src/python/marshal.cc b/src/python/marshal.cc
new file mode 100644
index 0000000..f6324dc
--- /dev/null
+++ b/src/python/marshal.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Andreas Sandberg
+ */
+
+#include <pybind11/embed.h>
+
+#include <iostream>
+
+namespace py = pybind11;
+using namespace pybind11::literals;
+
+int
+main(int argc, char **argv) {
+    py::scoped_interpreter guard{};
+
+    if (argc != 2) {
+        std::cerr << "Usage: marshal PYSOURCE\n" << std::endl;
+        exit(1);
+    }
+
+    auto locals = py::dict("source"_a=argv[1]);
+
+    py::exec(
+        "import marshal\n"
+        "with open(source, 'r') as f: src = f.read()\n"
+        "compiled = compile(src, source, 'exec')\n"
+        "marshalled = marshal.dumps(compiled)\n",
+        py::globals(), locals);
+
+    auto marshalled = locals["marshalled"].cast<std::string>();
+    std::cout << marshalled;
+
+    return 0;
+}