python,stdlib: Add multiprocessing module

This changeset replicates some of the multiprocessing module
implementation from the python standard library in gem5. The goal of
this and following changesets is to enable users to use a *single* set
of python scripts to run and analyze a suite of gem5 simulations.

We must reimplement some of the multiprocessing module becaue it is not
flexible enough to allow for customized command line parameter to the
"python" executable (gem5 in our case). To get around this, I extended
the Process and context objects to be gem5 specific.

The next steps is to wrap the Process and Pool types with gem5-specific
versions that will improve their usability for our needs. With this
changeset, these objects are usable, but it will require significant
user effort to reach the goal of running/analyzing many different gem5
simulations.

There are some limitation:
- The pool will only work if the max tasks per child is 1
- The functions that are executed must come from another module

As an example, the following code should work after applying this
change.

test.py:
```python
from gem5.utils.multiprocessing import Process, Pool
from sim import info, run_sim
if __name__ == '__m5_main__' or __name__ == '__main__':
    info('main line')
    p1 = Process(target=run_sim, args=('bob',))
    p2 = Process(target=run_sim, args=('jane',))
    p1.start()
    p2.start()
    p2.join()
    p1.join()
    with Pool(processes=4, maxtasksperchild=1) as pool:
        pool.map(run_sim, range(10))
```

sim.py:
```
import os
def info(title):
    print(title)
    print('module name:', __name__)
    print('parent process:', os.getppid())
    print('process id:', os.getpid())
def run_sim(name):
    info('function g')
    from gem5.prebuilt.demo.x86_demo_board import X86DemoBoard
    from gem5.resources.resource import Resource
    from gem5.simulate.simulator import Simulator
    board = X86DemoBoard()
    board.set_kernel_disk_workload(
        kernel=Resource("x86-linux-kernel-5.4.49"),
        disk_image=Resource("x86-ubuntu-18.04-img"),
    )
    simulator = Simulator(board=board)
    simulator.run(max_ticks=10000000)
```

Change-Id: I4348ebaa75d006949ec96e732f5dc2a5173c6048
Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63432
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
diff --git a/src/python/SConscript b/src/python/SConscript
index cbf88c9..6e0f6d7 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -241,6 +241,14 @@
 PySource('gem5.utils', 'gem5/utils/filelock.py')
 PySource('gem5.utils', 'gem5/utils/override.py')
 PySource('gem5.utils', 'gem5/utils/requires.py')
+PySource('gem5.utils.multiprocessing',
+    'gem5/utils/multiprocessing/__init__.py')
+PySource('gem5.utils.multiprocessing',
+    'gem5/utils/multiprocessing/_command_line.py')
+PySource('gem5.utils.multiprocessing',
+    'gem5/utils/multiprocessing/context.py')
+PySource('gem5.utils.multiprocessing',
+    'gem5/utils/multiprocessing/popen_spawn_gem5.py')
 
 PySource('', 'importer.py')
 PySource('m5', 'm5/__init__.py')
diff --git a/src/python/gem5/utils/multiprocessing/README.md b/src/python/gem5/utils/multiprocessing/README.md
new file mode 100644
index 0000000..da2116c
--- /dev/null
+++ b/src/python/gem5/utils/multiprocessing/README.md
@@ -0,0 +1,71 @@
+# gem5's wrapper around python multiprocessing
+
+This module wraps python's multiprocessing module so that it works with gem5.
+The multiprocessing module creates new python processes, but there is no way to customize the way these processes are created.
+This wrapper extends the python multiprocessing to support passing new arguments to the python (or gem5 in this case) executable when a new process is created.
+
+This code replicates some of the multiprocessing module implementation from the python standard library in gem5.
+The goal of this code is to enable users to use a *single* set of python scripts to run and analyze a suite of gem5 simulations.
+
+We must reimplement some of the multiprocessing module because it is not flexible enough to allow for customized command line parameter to the "python" executable (gem5 in our case).
+To get around this, I extended the Process and context objects to be gem5 specific.
+
+The next steps is to wrap the Process and Pool types with gem5-specific versions that will improve their usability for our needs.
+With this changeset, these objects are usable, but it will require significant user effort to reach the goal of running/analyzing many different gem5 simulations.
+
+## Example use
+
+test.py:
+
+```python
+from gem5.utils.multiprocessing import Process, Pool
+from sim import info, run_sim
+if __name__ == '__m5_main__' or __name__ == '__main__':
+    info('main line')
+    p1 = Process(target=run_sim, args=('bob',))
+    p2 = Process(target=run_sim, args=('jane',))
+    p1.start()
+    p2.start()
+    p2.join()
+    p1.join()
+    with Pool(processes=4, maxtasksperchild=1) as pool:
+        pool.map(run_sim, range(10))
+```
+
+sim.py:
+
+```python
+import os
+def info(title):
+    print(title)
+    print('module name:', __name__)
+    print('parent process:', os.getppid())
+    print('process id:', os.getpid())
+def run_sim(name):
+    info('function g')
+    from gem5.prebuilt.demo.x86_demo_board import X86DemoBoard
+    from gem5.resources.resource import Resource
+    from gem5.simulate.simulator import Simulator
+    board = X86DemoBoard()
+    board.set_kernel_disk_workload(
+        kernel=Resource("x86-linux-kernel-5.4.49"),
+        disk_image=Resource("x86-ubuntu-18.04-img"),
+    )
+    simulator = Simulator(board=board)
+    simulator.run(max_ticks=10000000)
+```
+
+Then, you can run `gem5 test.py`.
+This will execute `run_sim` 12 times.
+The first two will run in parallel, then the last 10 will run in parallel with up to 4 running at once.
+
+## Limitations
+
+- This only supports the spawn context. This is important because we need a fresh gem5 process for every subprocess.
+- When using `Pool`, the `maxtasksperchild` must be 1.
+- Process synchronization (queues, pipes, etc.) hasn't been tested
+- Functions that are used to execute in the subprocess must be imported from another module. In other words, we cannot pickle functions in the main/runner module.
+
+## Implementation notes
+
+- The `_start_method` must be `None` for the `Spawn_gem5Process` class. Otherwise, in `_bootstrap` in the `BaseProcess` it will try to force the `_start_method` to be gem5-specific, which the `multiprocessing` module doesn't understand.
diff --git a/src/python/gem5/utils/multiprocessing/__init__.py b/src/python/gem5/utils/multiprocessing/__init__.py
new file mode 100644
index 0000000..680aeac
--- /dev/null
+++ b/src/python/gem5/utils/multiprocessing/__init__.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2022 The Regents of The University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from .context import Process
+
+from .context import gem5Context
+
+Pool = gem5Context().Pool
+
+__all__ = ["Process", "Pool"]
diff --git a/src/python/gem5/utils/multiprocessing/_command_line.py b/src/python/gem5/utils/multiprocessing/_command_line.py
new file mode 100644
index 0000000..f682775
--- /dev/null
+++ b/src/python/gem5/utils/multiprocessing/_command_line.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2022 The Regents of The University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This file contains extensions of the multiprocessing module to be used with gem5.
+Specifically, it contains the code to produce the command line for spawned processes.
+Some code inspired by the Python standard library implementation of the
+multiprocessing module (i.e., cpython/Lib/multiprocessing/).
+"""
+
+import sys
+from multiprocessing import spawn, util
+
+
+def _gem5_args_for_multiprocessing(name):
+    from m5 import options
+
+    # Options that are disallowed with multiprocessing
+    disallowed = [
+        options.build_info,
+        options.copyright,
+        options.readme,
+        options.interactive,
+        options.pdb,
+        options.verbose,
+        options.debug_break,
+        options.debug_help,
+        options.debug_flags,
+        options.debug_start,
+        options.debug_end,
+        options.debug_ignore,
+        options.list_sim_objects,
+    ]
+    if any(disallowed):
+        raise Exception(
+            f"Disallowed option for multiprocessing. "
+            f"See {__file__} for details."
+        )
+
+    # Options not forwarded:
+    # --allow-remote-connections, --listener-mode, --dump-config, --json-config
+    # --dot-config, --dot-dvfs-config, --debug-file, --remote-gdb-port, -c
+
+    arguments = [
+        f"--outdir={options.outdir}/{name}",
+        f"--stdout-file={options.stdout_file}",
+        f"--stderr-file={options.stderr_file}",
+        f"--stats-file={options.stats_file}",
+    ]
+    if options.redirect_stdout:
+        arguments.append("--redirect-stdout")
+    if options.redirect_stderr:
+        arguments.append("--redirect-stderr")
+    if options.silent_redirect:
+        arguments.append("--silent-redirect")
+    if options.path:
+        arguments.append(f"--path={':'.join(options.path)}")
+    if options.quiet:
+        arguments.append("--quiet")
+
+    return arguments
+
+
+def get_command_line(name, **kwds):
+    """
+    Returns prefix of command line used for spawning a child process
+    """
+    if getattr(sys, "frozen", False):
+        return [sys.executable, "--multiprocessing-fork"] + [
+            "%s=%r" % item for item in kwds.items()
+        ]
+    else:
+        prog = "from multiprocessing.spawn import spawn_main; spawn_main(%s)"
+        prog %= ", ".join("%s=%r" % item for item in kwds.items())
+        opts = util._args_from_interpreter_flags()
+        opts.extend(_gem5_args_for_multiprocessing(name))
+        exe = spawn.get_executable()
+        return [exe] + opts + ["-c", prog, "--multiprocessing-fork"]
diff --git a/src/python/gem5/utils/multiprocessing/context.py b/src/python/gem5/utils/multiprocessing/context.py
new file mode 100644
index 0000000..2108bc6
--- /dev/null
+++ b/src/python/gem5/utils/multiprocessing/context.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2022 The Regents of The University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This file contains extensions of the multiprocessing module to be used with gem5
+Some code inspired by the Python standard library implementation of the
+multiprocessing module (i.e., cpython/Lib/multiprocessing/).
+"""
+
+from multiprocessing import context, process
+from multiprocessing.context import DefaultContext
+
+# The `_start_method` must be `None` for the `Spawn_gem5Process` class.
+# Otherwise, in `_bootstrap` in the `BaseProcess` it will try to force the
+# `_start_method` to be gem5-specific, which the `multiprocessing` module
+# doesn't understand.
+class Spawn_gem5Process(process.BaseProcess):
+    _start_method = None
+
+    @staticmethod
+    def _Popen(process_obj):
+        from .popen_spawn_gem5 import Popen
+
+        return Popen(process_obj)
+
+
+class Process(process.BaseProcess):
+    _start_method = None
+
+    @staticmethod
+    def _Popen(process_obj):
+        return _default_context.get_context().Process._Popen(process_obj)
+
+
+class gem5Context(context.BaseContext):
+    _name = "spawn_gem5"
+    Process = Spawn_gem5Process
+
+    def get_context(self, method=None):
+        if method is None:
+            return self
+        try:
+            ctx = _concrete_contexts[method]
+        except KeyError:
+            raise ValueError("cannot find context for %r" % method) from None
+        ctx._check_available()
+        return ctx
+
+
+_concrete_contexts = {"spawn_gem5": gem5Context()}
+
+_default_context = DefaultContext(_concrete_contexts["spawn_gem5"])
diff --git a/src/python/gem5/utils/multiprocessing/popen_spawn_gem5.py b/src/python/gem5/utils/multiprocessing/popen_spawn_gem5.py
new file mode 100644
index 0000000..13fb336
--- /dev/null
+++ b/src/python/gem5/utils/multiprocessing/popen_spawn_gem5.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2022 The Regents of The University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This file contains extensions of the multiprocessing module to be used with gem5.
+Specifically, it contains the code to spawn a new gem5 process with Popen.
+Some code is from the Python standard library implementation of the
+multiprocessing module (i.e., cpython/Lib/multiprocessing/).
+"""
+
+import io
+import os
+
+from multiprocessing.context import reduction, set_spawning_popen
+from multiprocessing import popen_spawn_posix
+from multiprocessing import spawn
+from multiprocessing import util
+
+from ._command_line import get_command_line
+
+__all__ = ["Popen"]
+
+
+class Popen(popen_spawn_posix.Popen):
+    method = "spawn_gem5"
+
+    def __init__(self, process_obj):
+        super().__init__(process_obj)
+
+    # Copyright (c) 2001-2022 Python Software Foundation; All Rights Reserved
+    # from cpython/Lib/multiprocessing/popen_spawn_posix.py
+    def _launch(self, process_obj):
+        from multiprocessing import resource_tracker
+
+        tracker_fd = resource_tracker.getfd()
+        self._fds.append(tracker_fd)
+        prep_data = spawn.get_preparation_data(process_obj._name)
+        fp = io.BytesIO()
+        set_spawning_popen(self)
+        try:
+            reduction.dump(prep_data, fp)
+            reduction.dump(process_obj, fp)
+        finally:
+            set_spawning_popen(None)
+
+        parent_r = child_w = child_r = parent_w = None
+        try:
+            parent_r, child_w = os.pipe()
+            child_r, parent_w = os.pipe()
+            # Note: This next line is the only modification
+            cmd = get_command_line(
+                tracker_fd=tracker_fd,
+                pipe_handle=child_r,
+                name=process_obj.name,
+            )
+            self._fds.extend([child_r, child_w])
+            self.pid = util.spawnv_passfds(
+                spawn.get_executable(), cmd, self._fds
+            )
+            self.sentinel = parent_r
+            with open(parent_w, "wb", closefd=False) as f:
+                f.write(fp.getbuffer())
+        finally:
+            fds_to_close = []
+            for fd in (parent_r, parent_w):
+                if fd is not None:
+                    fds_to_close.append(fd)
+            self.finalizer = util.Finalize(self, util.close_fds, fds_to_close)
+
+            for fd in (child_r, child_w):
+                if fd is not None:
+                    os.close(fd)