util/gem5art/run/gem5art/run.py - public/gem5 - Git at Google

 # Copyright (c) 2019, 2021 The Regents of the University of California
 # All Rights Reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
 # notice, this list of conditions and the following disclaimer;
 # redistributions in binary form must reproduce the above copyright
 # notice, this list of conditions and the following disclaimer in the
 # documentation and/or other materials provided with the distribution;
 # neither the name of the copyright holders nor the names of its
 # contributors may be used to endorse or promote products derived from
 # this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 """
 This file defines a gem5Run object which contains all information needed to
 run a single gem5 test.

 This class works closely with the artifact module to ensure that the gem5
 experiment is reproducible and the output is saved to the database.
 """

 import hashlib
 import json
 import os
 from pathlib import Path
 import signal
 import subprocess
 import time
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 from uuid import UUID, uuid4
 import zipfile

 from gem5art import artifact
 from gem5art.artifact import Artifact
 from gem5art.artifact._artifactdb import ArtifactDB


 class gem5Run:
     """
     This class holds all of the info required to run gem5.
     """

     _id: UUID
     hash: str
     type: str
     name: str
     gem5_binary_path: Path
     run_script: Path
     gem5_artifact: Artifact
     gem5_git_artifact: Artifact
     run_script_git_artifact: Artifact
     params: Tuple[str, ...]
     timeout: int
     check_failure: Callable[["gem5Run"], bool]

     gem5_name: str
     script_name: str
     linux_name: str
     disk_name: str
     string: str

     outdir: Path

     linux_binary_path: Path
     disk_image_path: Path
     linux_binary_artifact: Artifact
     disk_image_artifact: Artifact

     command: List[str]

     running: bool
     enqueue_time: float
     start_time: float
     end_time: float
     return_code: int
     kill_reason: str
     status: str
     pid: int
     task_id: Any

     results: Optional[Artifact]
     artifacts: List[Artifact]

     rerunnable: bool

     @classmethod
     def _create(
         cls,
         name: str,
         run_script: Path,
         outdir: Path,
         gem5_artifact: Artifact,
         gem5_git_artifact: Artifact,
         run_script_git_artifact: Artifact,
         params: Tuple[str, ...],
         timeout: int,
         check_failure: Callable[["gem5Run"], bool],
     ) -> "gem5Run":
         """
         Shared code between SE and FS when creating a run object.
         """
         run = cls()
         run.name = name
         run.gem5_binary_path = gem5_artifact.path
         run.run_script = run_script
         run.gem5_artifact = gem5_artifact
         run.gem5_git_artifact = gem5_git_artifact
         run.run_script_git_artifact = run_script_git_artifact
         run.params = params
         run.timeout = timeout

         # Note: Mypy doesn't support monkey patching like this
         run.check_failure = check_failure  # type: ignore

         run._id = uuid4()

         run.outdir = outdir.resolve()  # ensure this is absolute

         # Assumes **/<gem5_name>/gem5.<anything>
         run.gem5_name = run.gem5_binary_path.parent.name
         # Assumes **/<script_name>.py
         run.script_name = run.run_script.stem

         # Info about the actual run
         run.running = False
         run.enqueue_time = time.time()
         run.start_time = 0.0
         run.end_time = 0.0
         run.return_code = 0
         run.kill_reason = ""
         run.status = "Created"
         run.pid = 0
         run.task_id = None

         # Initially, there are no results
         run.results = None

         run.rerunnable = False

         return run

     @classmethod
     def createSERun(
         cls,
         name: str,
         run_script: str,
         outdir: str,
         gem5_artifact: Artifact,
         gem5_git_artifact: Artifact,
         run_script_git_artifact: Artifact,
         *params: str,
         timeout: int = 60 * 15,
         check_failure: Callable[["gem5Run"], bool] = lambda run: False,
     ) -> "gem5Run":
         """
         name is the name of the run. The name is not necessarily unique. The
         name could be used to query the results of the run.

         run_script is the path to the run script to pass to gem5.

         The artifact parameters (gem5_artifact, gem5_git_artifact, and
         run_script_git_artifact) are used to ensure this is reproducible run.

         Further parameters can be passed via extra arguments. These
         parameters will be passed in order to the gem5 run script.
         timeout is the time in seconds to run the subprocess before killing it.

         Note: When instantiating this class for the first time, it will create
         a file `info.json` in the outdir which contains a serialized version
         of this class.
         """

         run = cls._create(
             name,
             Path(run_script),
             Path(outdir),
             gem5_artifact,
             gem5_git_artifact,
             run_script_git_artifact,
             params,
             timeout,
             check_failure,
         )

         run.artifacts = [
             gem5_artifact,
             gem5_git_artifact,
             run_script_git_artifact,
         ]

         run.string = f"{run.gem5_name} {run.script_name}"
         run.string += " ".join(run.params)

         run.command = [
             str(run.gem5_binary_path),
             "-re",
             f"--outdir={run.outdir}",
             str(run.run_script),
         ]
         run.command += list(params)

         run.hash = run._getHash()
         run.type = "gem5 run"

         # Make the directory if it doesn't exist
         os.makedirs(run.outdir, exist_ok=True)
         run.dumpJson("info.json")

         return run

     @classmethod
     def createFSRun(
         cls,
         name: str,
         run_script: str,
         outdir: str,
         gem5_artifact: Artifact,
         gem5_git_artifact: Artifact,
         run_script_git_artifact: Artifact,
         linux_binary_artifact: Artifact,
         disk_image_artifact: Artifact,
         *params: str,
         timeout: int = 60 * 15,
         check_failure: Callable[["gem5Run"], bool] = lambda run: False,
     ) -> "gem5Run":
         """
         name is the name of the run. The name is not necessarily unique. The
         name could be used to query the results of the run.

         run_script is the path to the run script to pass to gem5.

         Further parameters can be passed via extra arguments. These
         parameters will be passed in order to the gem5 run script.

         check_failure is a user-defined function that will be executed
         periodically (e.g., every 10 seconds) to check the health of the
         simulation. When it returns True, the simulation will be killed

         Note: When instantiating this class for the first time, it will create
         a file `info.json` in the outdir which contains a serialized version
         of this class.
         """
         run = cls._create(
             name,
             Path(run_script),
             Path(outdir),
             gem5_artifact,
             gem5_git_artifact,
             run_script_git_artifact,
             params,
             timeout,
             check_failure,
         )
         run.linux_binary_path = Path(linux_binary_artifact.path)
         run.disk_image_path = Path(disk_image_artifact.path)
         run.linux_binary_artifact = linux_binary_artifact
         run.disk_image_artifact = disk_image_artifact

         # Assumes **/<linux_name>
         run.linux_name = run.linux_binary_path.name
         # Assumes **/<disk_name>
         run.disk_name = disk_image_artifact.name

         run.artifacts = [
             gem5_artifact,
             gem5_git_artifact,
             run_script_git_artifact,
             linux_binary_artifact,
             disk_image_artifact,
         ]

         run.string = f"{run.gem5_name} {run.script_name} "
         run.string += f"{run.linux_name} {run.disk_name} "
         run.string += " ".join(run.params)
         run.command = [
             str(run.gem5_binary_path),
             "-re",
             f"--outdir={run.outdir}",
             str(run.run_script),
             str(run.linux_binary_path),
             str(run.disk_image_path),
         ]
         run.command += list(params)

         run.hash = run._getHash()
         run.type = "gem5 run fs"

         # Make the directory if it doesn't exist
         os.makedirs(run.outdir, exist_ok=True)
         run.dumpJson("info.json")

         return run

     @classmethod
     def loadJson(cls, filename: str) -> "gem5Run":
         with open(filename) as f:
             d = json.load(f)
             # Convert string version of UUID to UUID object
             for k, v in d.iteritems():
                 if k.endswith("_artifact"):
                     d[k] = UUID(v)
             d["_id"] = UUID(d["_id"])
         try:
             return cls.loadFromDict(d)
         except KeyError:
             print("Incompatible json file: {}!".format(filename))
             raise

     @classmethod
     def loadFromDict(cls, d: Dict[str, Union[str, UUID]]) -> "gem5Run":
         """Returns new gem5Run instance from the dictionary of values in d"""
         run = cls()
         run.artifacts = []
         for k, v in d.items():
             if isinstance(v, UUID) and k != "_id":
                 a = Artifact(v)
                 setattr(run, k, a)
                 run.artifacts.append(a)
             else:
                 setattr(run, k, v)
         return run

     def checkArtifacts(self, cwd: str) -> bool:
         """Checks to make sure all of the artifacts are up to date

         This should happen just before running gem5. This function will return
         False if the artifacts don't check and true if they are all the same.
         For the git repos, this checks the git hash, for binary artifacts this
         checks the md5 hash.
         """
         for v in self.artifacts:
             if v.type == "git repo":
                 new = artifact.artifact.getGit(cwd / v.path)["hash"]
                 old = v.git["hash"]
             else:
                 new = artifact.artifact.getHash(cwd / v.path)
                 old = v.hash

             if new != old:
                 self.status = f"Failed artifact check for {cwd / v.path}"
                 return False

         return True

     def __repr__(self) -> str:
         return str(self._getSerializable())

     def checkKernelPanic(self) -> bool:
         """
         Returns true if the gem5 instance specified in args has a kernel panic
         Note: this gets around the problem that gem5 doesn't exit on panics.
         """
         term_path = self.outdir / "system.pc.com_1.device"
         if not term_path.exists():
             return False

         with open(term_path, "rb") as f:
             try:
                 f.seek(-1000, os.SEEK_END)
             except OSError:
                 return False
             try:
                 # There was a case where reading `term_path` resulted in a
                 # UnicodeDecodeError. It is known that the terminal output
                 # (content of 'system.pc.com_1.device') is written from a
                 # buffer from gem5, and when gem5 stops, the content of the
                 # buffer is stopped being copied to the file. The buffer is
                 # not flushed as well. So, it might be a case that the content
                 # of the `term_path` is corrupted as a Unicode character could
                 # be longer than a byte.
                 last = f.readlines()[-1].decode()
                 if "Kernel panic" in last:
                     return True
                 else:
                     return False
             except UnicodeDecodeError:
                 return False

     def _getSerializable(self) -> Dict[str, Union[str, UUID]]:
         """Returns a dictionary that can be used to recreate this object

         Note: All artifacts are converted to a UUID instead of an Artifact.
         """
         # Grab all of the member variables
         d = vars(self).copy()

         # Remove list of artifacts
         del d["artifacts"]

         # Doesn't make sense to serialize the user-specified fail function
         if "check_failure" in d.keys():
             del d["check_failure"]

         # Replace the artifacts with their UUIDs
         for k, v in d.items():
             if isinstance(v, Artifact):
                 d[k] = v._id
             if isinstance(v, Path):
                 d[k] = str(v)

         return d

     def _getHash(self) -> str:
         """Return a single value that uniquely identifies this run

         To uniquely identify this run, the gem5 binary, gem5 scripts, and
         parameters should all match. Thus, let's make a single hash out of the
         artifacts + the runscript + parameters
         """
         to_hash = [art._id.bytes for art in self.artifacts]
         to_hash.append(str(self.run_script).encode())
         to_hash.append(" ".join(self.params).encode())

         return hashlib.md5(b"".join(to_hash)).hexdigest()

     @classmethod
     def _convertForJson(cls, d: Dict[str, Any]) -> Dict[str, str]:
         """Converts UUID objects to strings for json compatibility"""
         for k, v in d.items():
             if isinstance(v, UUID):
                 d[k] = str(v)
         return d

     def dumpJson(self, filename: str) -> None:
         """Dump all info into a json file"""
         d = self._convertForJson(self._getSerializable())
         with open(self.outdir / filename, "w") as f:
             json.dump(d, f)

     def dumpsJson(self) -> str:
         """Like dumpJson except returns string"""
         d = self._convertForJson(self._getSerializable())
         return json.dumps(d)

     def _run(self, task: Any = None, cwd: str = ".") -> None:
         """Actually run the test.

         Calls Popen with the command to fork a new process.
         Then, this function polls the process every 5 seconds to check if it
         has finished or not. Each time it checks, it dumps the json info so
         other applications can poll those files.

         task is the celery task that is running this gem5 instance.

         cwd is the directory to change to before running. This allows a server
         process to run in a different directory than the running process. Note
         that only the spawned process runs in the new directory.
         """
         # Connect to the database
         db = artifact.getDBConnection()

         self.status = "Begin run"
         self.dumpJson("info.json")

         if not self.checkArtifacts(cwd):
             self.dumpJson("info.json")
             return

         self.status = "Spawning"

         self.start_time = time.time()
         self.task_id = task.request.id if task else None
         self.dumpJson("info.json")

         # Start running the gem5 command
         proc = subprocess.Popen(self.command, cwd=cwd)

         # Register handler in case this process is killed while the gem5
         # instance is running. Note: there's a bit of a race condition here,
         # but hopefully it's not a big deal
         def handler(signum, frame):
             proc.kill()
             self.kill_reason = "sigterm"
             self.dumpJson("info.json")
             # Note: We'll fall out of the while loop after this.

         # This makes it so if you term *this* process, it will actually kill
         # the subprocess and then this process will die.
         signal.signal(signal.SIGTERM, handler)

         # Do this until the subprocess is done (successfully or not)
         while proc.poll() is None:
             self.status = "Running"
             # Still running
             self.current_time = time.time()
             self.pid = proc.pid
             self.running = True

             if self.current_time - self.start_time > self.timeout:
                 proc.kill()
                 self.kill_reason = "timeout"

             if self.checkKernelPanic():
                 proc.kill()
                 self.kill_reason = "kernel panic"

             # Assigning a function/lambda to an object variable does not make
             # the function/lambda become a bound one. Therefore, the
             # user-defined function must pass `self` in.
             # Here, mypy classifies self.check_failure() as a bound function,
             # so we tell mypy to ignore it./
             if self.check_failure(self):  # type: ignore
                 proc.kill()
                 self.kill_reason = "User defined kill"

             self.dumpJson("info.json")

             # Check again in five seconds
             time.sleep(5)

         print("Done running {}".format(" ".join(self.command)))

         # Done executing
         self.running = False
         self.end_time = time.time()
         self.return_code = proc.returncode

         if self.return_code == 0:
             self.status = "Finished"
         else:
             self.status = "Failed"

         self.dumpJson("info.json")

         self.saveResults()

         # Store current gem5 run in the database
         db.put(self._id, self._getSerializable())

         print("Done storing the results of {}".format(" ".join(self.command)))

     def run(self, task: Any = None, cwd: str = ".") -> None:
         """Actually run the test.

         Calls Popen with the command to fork a new process.
         Then, this function polls the process every 5 seconds to check if it
         has finished or not. Each time it checks, it dumps the json info so
         other applications can poll those files.

         task is the celery task that is running this gem5 instance.

         cwd is the directory to change to before running. This allows a server
         process to run in a different directory than the running process. Note
         that only the spawned process runs in the new directory.
         """
         # Check if the run is already in the database
         db = artifact.getDBConnection()
         if self.hash in db:
             print(f"Error: Have already run {self.command}. Exiting!")
             return
         self._run(task, cwd)

     def rerun(self, task: Any = None, cwd: str = ".") -> None:
         """Rerun the test.

         Calls Popen with the command to fork a new process.
         Then, this function polls the process every 5 seconds to check if it
         has finished or not. Each time it checks, it dumps the json info so
         other applications can poll those files.

         task is the celery task that is running this gem5 instance.

         cwd is the directory to change to before running. This allows a server
         process to run in a different directory than the running process. Note
         that only the spawned process runs in the new directory.
         """
         # TODO: remove the old runs?
         self._run(task, cwd)

     def saveResults(self) -> None:
         """Zip up the output directory and store the results in the
         database."""

         with zipfile.ZipFile(
             self.outdir / "results.zip", "w", zipfile.ZIP_DEFLATED
         ) as zipf:
             for path in self.outdir.glob("**/*"):
                 if path.name == "results.zip":
                     continue
                 zipf.write(path, path.relative_to(self.outdir.parent))

         self.results = Artifact.registerArtifact(
             command=f"zip results.zip -r {self.outdir}",
             name=self.name,
             typ="directory",
             path=self.outdir / "results.zip",
             cwd="./",
             documentation="Compressed version of the results directory",
         )

     def __str__(self) -> str:
         return self.string + " -> " + self.status


 def getRuns(
     db: ArtifactDB, fs_only: bool = False, limit: int = 0
 ) -> Iterable[gem5Run]:
     """Returns a generator of gem5Run objects.

     If fs_only is True, then only full system runs will be returned.
     Limit specifies the maximum number of runs to return.
     """

     if not fs_only:
         runs = db.searchByType("gem5 run", limit=limit)
         for run in runs:
             yield gem5Run.loadFromDict(run)

     fsruns = db.searchByType("gem5 run fs", limit=limit)
     for run in fsruns:
         yield gem5Run.loadFromDict(run)


 def getRunsByName(
     db: ArtifactDB, name: str, fs_only: bool = False, limit: int = 0
 ) -> Iterable[gem5Run]:
     """Returns a generator of gem5Run objects, which have the field "name"
     **exactly** the same as the name parameter. The name used in this query
     is case sensitive.

     If fs_only is True, then only full system runs will be returned.
     Limit specifies the maximum number of runs to return.
     """

     if not fs_only:
         seruns = db.searchByNameType(name, "gem5 run", limit=limit)
         for run in seruns:
             yield gem5Run.loadFromDict(run)

     fsruns = db.searchByNameType(name, "gem5 run fs", limit=limit)

     for run in fsruns:
         yield gem5Run.loadFromDict(run)


 def getRunsByNameLike(
     db: ArtifactDB, name: str, fs_only: bool = False, limit: int = 0
 ) -> Iterable[gem5Run]:
     """Return a generator of gem5Run objects, which have the field "name"
     containing the name parameter as a substring. The name used in this
     query is case sensitive.

     If fs_only is True, then only full system runs will be returned.
     Limit specifies the maximum number of runs to return.
     """

     if not fs_only:
         seruns = db.searchByLikeNameType(name, "gem5 run", limit=limit)

         for run in seruns:
             yield gem5Run.loadFromDict(run)

     fsruns = db.searchByLikeNameType(name, "gem5 run fs", limit=limit)

     for run in fsruns:
         yield gem5Run.loadFromDict(run)


 def getRerunnableRunsByNameLike(
     db: ArtifactDB, name: str, fs_only: bool = False, limit: int = 0
 ) -> Iterable[gem5Run]:

     """Returns a generator of gem5Run objects having rerunnable as true
     and the object "name" containing the name parameter as a substring. The
     parameter is case sensitive.

     If fs_only is True, then only full system runs will be returned.
     Limit specifies the maximum number of runs to return.
     """

     for run in getRunsByNameLike(db, name, fs_only, limit):
         if run.rerunnable:
             yield run
	# Copyright (c) 2019, 2021 The Regents of the University of California
	# All Rights Reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are
	# met: redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer;
	# redistributions in binary form must reproduce the above copyright
	# notice, this list of conditions and the following disclaimer in the
	# documentation and/or other materials provided with the distribution;
	# neither the name of the copyright holders nor the names of its
	# contributors may be used to endorse or promote products derived from
	# this software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	"""
	This file defines a gem5Run object which contains all information needed to
	run a single gem5 test.

	This class works closely with the artifact module to ensure that the gem5
	experiment is reproducible and the output is saved to the database.
	"""

	import hashlib
	import json
	import os
	from pathlib import Path
	import signal
	import subprocess
	import time
	from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
	from uuid import UUID, uuid4
	import zipfile

	from gem5art import artifact
	from gem5art.artifact import Artifact
	from gem5art.artifact._artifactdb import ArtifactDB


	class gem5Run:
	"""
	This class holds all of the info required to run gem5.
	"""

	_id: UUID
	hash: str
	type: str
	name: str
	gem5_binary_path: Path
	run_script: Path
	gem5_artifact: Artifact
	gem5_git_artifact: Artifact
	run_script_git_artifact: Artifact
	params: Tuple[str, ...]
	timeout: int
	check_failure: Callable[["gem5Run"], bool]

	gem5_name: str
	script_name: str
	linux_name: str
	disk_name: str
	string: str

	outdir: Path

	linux_binary_path: Path
	disk_image_path: Path
	linux_binary_artifact: Artifact
	disk_image_artifact: Artifact

	command: List[str]

	running: bool
	enqueue_time: float
	start_time: float
	end_time: float
	return_code: int
	kill_reason: str
	status: str
	pid: int
	task_id: Any

	results: Optional[Artifact]
	artifacts: List[Artifact]

	rerunnable: bool

	@classmethod
	def _create(
	cls,
	name: str,
	run_script: Path,
	outdir: Path,
	gem5_artifact: Artifact,
	gem5_git_artifact: Artifact,
	run_script_git_artifact: Artifact,
	params: Tuple[str, ...],
	timeout: int,
	check_failure: Callable[["gem5Run"], bool],
	) -> "gem5Run":
	"""
	Shared code between SE and FS when creating a run object.
	"""
	run = cls()
	run.name = name
	run.gem5_binary_path = gem5_artifact.path
	run.run_script = run_script
	run.gem5_artifact = gem5_artifact
	run.gem5_git_artifact = gem5_git_artifact
	run.run_script_git_artifact = run_script_git_artifact
	run.params = params
	run.timeout = timeout

	# Note: Mypy doesn't support monkey patching like this
	run.check_failure = check_failure # type: ignore

	run._id = uuid4()

	run.outdir = outdir.resolve() # ensure this is absolute

	# Assumes **/<gem5_name>/gem5.<anything>
	run.gem5_name = run.gem5_binary_path.parent.name
	# Assumes **/<script_name>.py
	run.script_name = run.run_script.stem

	# Info about the actual run
	run.running = False
	run.enqueue_time = time.time()
	run.start_time = 0.0
	run.end_time = 0.0
	run.return_code = 0
	run.kill_reason = ""
	run.status = "Created"
	run.pid = 0
	run.task_id = None

	# Initially, there are no results
	run.results = None

	run.rerunnable = False

	return run

	@classmethod
	def createSERun(
	cls,
	name: str,
	run_script: str,
	outdir: str,
	gem5_artifact: Artifact,
	gem5_git_artifact: Artifact,
	run_script_git_artifact: Artifact,
	*params: str,
	timeout: int = 60 * 15,
	check_failure: Callable[["gem5Run"], bool] = lambda run: False,
	) -> "gem5Run":
	"""
	name is the name of the run. The name is not necessarily unique. The
	name could be used to query the results of the run.

	run_script is the path to the run script to pass to gem5.

	The artifact parameters (gem5_artifact, gem5_git_artifact, and
	run_script_git_artifact) are used to ensure this is reproducible run.

	Further parameters can be passed via extra arguments. These
	parameters will be passed in order to the gem5 run script.
	timeout is the time in seconds to run the subprocess before killing it.

	Note: When instantiating this class for the first time, it will create
	a file `info.json` in the outdir which contains a serialized version
	of this class.
	"""

	run = cls._create(
	name,
	Path(run_script),
	Path(outdir),
	gem5_artifact,
	gem5_git_artifact,
	run_script_git_artifact,
	params,
	timeout,
	check_failure,
	)

	run.artifacts = [
	gem5_artifact,
	gem5_git_artifact,
	run_script_git_artifact,
	]

	run.string = f"{run.gem5_name} {run.script_name}"
	run.string += " ".join(run.params)

	run.command = [
	str(run.gem5_binary_path),
	"-re",
	f"--outdir={run.outdir}",
	str(run.run_script),
	]
	run.command += list(params)

	run.hash = run._getHash()
	run.type = "gem5 run"

	# Make the directory if it doesn't exist
	os.makedirs(run.outdir, exist_ok=True)
	run.dumpJson("info.json")

	return run

	@classmethod
	def createFSRun(
	cls,
	name: str,
	run_script: str,
	outdir: str,
	gem5_artifact: Artifact,
	gem5_git_artifact: Artifact,
	run_script_git_artifact: Artifact,
	linux_binary_artifact: Artifact,
	disk_image_artifact: Artifact,
	*params: str,
	timeout: int = 60 * 15,
	check_failure: Callable[["gem5Run"], bool] = lambda run: False,
	) -> "gem5Run":
	"""
	name is the name of the run. The name is not necessarily unique. The
	name could be used to query the results of the run.

	run_script is the path to the run script to pass to gem5.

	Further parameters can be passed via extra arguments. These
	parameters will be passed in order to the gem5 run script.

	check_failure is a user-defined function that will be executed
	periodically (e.g., every 10 seconds) to check the health of the
	simulation. When it returns True, the simulation will be killed

	Note: When instantiating this class for the first time, it will create
	a file `info.json` in the outdir which contains a serialized version
	of this class.
	"""
	run = cls._create(
	name,
	Path(run_script),
	Path(outdir),
	gem5_artifact,
	gem5_git_artifact,
	run_script_git_artifact,
	params,
	timeout,
	check_failure,
	)
	run.linux_binary_path = Path(linux_binary_artifact.path)
	run.disk_image_path = Path(disk_image_artifact.path)
	run.linux_binary_artifact = linux_binary_artifact
	run.disk_image_artifact = disk_image_artifact

	# Assumes **/<linux_name>
	run.linux_name = run.linux_binary_path.name
	# Assumes **/<disk_name>
	run.disk_name = disk_image_artifact.name

	run.artifacts = [
	gem5_artifact,
	gem5_git_artifact,
	run_script_git_artifact,
	linux_binary_artifact,
	disk_image_artifact,
	]

	run.string = f"{run.gem5_name} {run.script_name} "
	run.string += f"{run.linux_name} {run.disk_name} "
	run.string += " ".join(run.params)
	run.command = [
	str(run.gem5_binary_path),
	"-re",
	f"--outdir={run.outdir}",
	str(run.run_script),
	str(run.linux_binary_path),
	str(run.disk_image_path),
	]
	run.command += list(params)

	run.hash = run._getHash()
	run.type = "gem5 run fs"

	# Make the directory if it doesn't exist
	os.makedirs(run.outdir, exist_ok=True)
	run.dumpJson("info.json")

	return run

	@classmethod
	def loadJson(cls, filename: str) -> "gem5Run":
	with open(filename) as f:
	d = json.load(f)
	# Convert string version of UUID to UUID object
	for k, v in d.iteritems():
	if k.endswith("_artifact"):
	d[k] = UUID(v)
	d["_id"] = UUID(d["_id"])
	try:
	return cls.loadFromDict(d)
	except KeyError:
	print("Incompatible json file: {}!".format(filename))
	raise

	@classmethod
	def loadFromDict(cls, d: Dict[str, Union[str, UUID]]) -> "gem5Run":
	"""Returns new gem5Run instance from the dictionary of values in d"""
	run = cls()
	run.artifacts = []
	for k, v in d.items():
	if isinstance(v, UUID) and k != "_id":
	a = Artifact(v)
	setattr(run, k, a)
	run.artifacts.append(a)
	else:
	setattr(run, k, v)
	return run

	def checkArtifacts(self, cwd: str) -> bool:
	"""Checks to make sure all of the artifacts are up to date

	This should happen just before running gem5. This function will return
	False if the artifacts don't check and true if they are all the same.
	For the git repos, this checks the git hash, for binary artifacts this
	checks the md5 hash.
	"""
	for v in self.artifacts:
	if v.type == "git repo":
	new = artifact.artifact.getGit(cwd / v.path)["hash"]
	old = v.git["hash"]
	else:
	new = artifact.artifact.getHash(cwd / v.path)
	old = v.hash

	if new != old:
	self.status = f"Failed artifact check for {cwd / v.path}"
	return False

	return True

	def __repr__(self) -> str:
	return str(self._getSerializable())

	def checkKernelPanic(self) -> bool:
	"""
	Returns true if the gem5 instance specified in args has a kernel panic
	Note: this gets around the problem that gem5 doesn't exit on panics.
	"""
	term_path = self.outdir / "system.pc.com_1.device"
	if not term_path.exists():
	return False

	with open(term_path, "rb") as f:
	try:
	f.seek(-1000, os.SEEK_END)
	except OSError:
	return False
	try:
	# There was a case where reading `term_path` resulted in a
	# UnicodeDecodeError. It is known that the terminal output
	# (content of 'system.pc.com_1.device') is written from a
	# buffer from gem5, and when gem5 stops, the content of the
	# buffer is stopped being copied to the file. The buffer is
	# not flushed as well. So, it might be a case that the content
	# of the `term_path` is corrupted as a Unicode character could
	# be longer than a byte.
	last = f.readlines()[-1].decode()
	if "Kernel panic" in last:
	return True
	else:
	return False
	except UnicodeDecodeError:
	return False

	def _getSerializable(self) -> Dict[str, Union[str, UUID]]:
	"""Returns a dictionary that can be used to recreate this object

	Note: All artifacts are converted to a UUID instead of an Artifact.
	"""
	# Grab all of the member variables
	d = vars(self).copy()

	# Remove list of artifacts
	del d["artifacts"]

	# Doesn't make sense to serialize the user-specified fail function
	if "check_failure" in d.keys():
	del d["check_failure"]

	# Replace the artifacts with their UUIDs
	for k, v in d.items():
	if isinstance(v, Artifact):
	d[k] = v._id
	if isinstance(v, Path):
	d[k] = str(v)

	return d

	def _getHash(self) -> str:
	"""Return a single value that uniquely identifies this run

	To uniquely identify this run, the gem5 binary, gem5 scripts, and
	parameters should all match. Thus, let's make a single hash out of the
	artifacts + the runscript + parameters
	"""
	to_hash = [art._id.bytes for art in self.artifacts]
	to_hash.append(str(self.run_script).encode())
	to_hash.append(" ".join(self.params).encode())

	return hashlib.md5(b"".join(to_hash)).hexdigest()

	@classmethod
	def _convertForJson(cls, d: Dict[str, Any]) -> Dict[str, str]:
	"""Converts UUID objects to strings for json compatibility"""
	for k, v in d.items():
	if isinstance(v, UUID):
	d[k] = str(v)
	return d

	def dumpJson(self, filename: str) -> None:
	"""Dump all info into a json file"""
	d = self._convertForJson(self._getSerializable())
	with open(self.outdir / filename, "w") as f:
	json.dump(d, f)

	def dumpsJson(self) -> str:
	"""Like dumpJson except returns string"""
	d = self._convertForJson(self._getSerializable())
	return json.dumps(d)

	def _run(self, task: Any = None, cwd: str = ".") -> None:
	"""Actually run the test.

	Calls Popen with the command to fork a new process.
	Then, this function polls the process every 5 seconds to check if it
	has finished or not. Each time it checks, it dumps the json info so
	other applications can poll those files.

	task is the celery task that is running this gem5 instance.

	cwd is the directory to change to before running. This allows a server
	process to run in a different directory than the running process. Note
	that only the spawned process runs in the new directory.
	"""
	# Connect to the database
	db = artifact.getDBConnection()

	self.status = "Begin run"
	self.dumpJson("info.json")

	if not self.checkArtifacts(cwd):
	self.dumpJson("info.json")
	return

	self.status = "Spawning"

	self.start_time = time.time()
	self.task_id = task.request.id if task else None
	self.dumpJson("info.json")

	# Start running the gem5 command
	proc = subprocess.Popen(self.command, cwd=cwd)

	# Register handler in case this process is killed while the gem5
	# instance is running. Note: there's a bit of a race condition here,
	# but hopefully it's not a big deal
	def handler(signum, frame):
	proc.kill()
	self.kill_reason = "sigterm"
	self.dumpJson("info.json")
	# Note: We'll fall out of the while loop after this.

	# This makes it so if you term this process, it will actually kill
	# the subprocess and then this process will die.
	signal.signal(signal.SIGTERM, handler)

	# Do this until the subprocess is done (successfully or not)
	while proc.poll() is None:
	self.status = "Running"
	# Still running
	self.current_time = time.time()
	self.pid = proc.pid
	self.running = True

	if self.current_time - self.start_time > self.timeout:
	proc.kill()
	self.kill_reason = "timeout"

	if self.checkKernelPanic():
	proc.kill()
	self.kill_reason = "kernel panic"

	# Assigning a function/lambda to an object variable does not make
	# the function/lambda become a bound one. Therefore, the
	# user-defined function must pass `self` in.
	# Here, mypy classifies self.check_failure() as a bound function,
	# so we tell mypy to ignore it./
	if self.check_failure(self): # type: ignore
	proc.kill()
	self.kill_reason = "User defined kill"

	self.dumpJson("info.json")

	# Check again in five seconds
	time.sleep(5)

	print("Done running {}".format(" ".join(self.command)))

	# Done executing
	self.running = False
	self.end_time = time.time()
	self.return_code = proc.returncode

	if self.return_code == 0:
	self.status = "Finished"
	else:
	self.status = "Failed"

	self.dumpJson("info.json")

	self.saveResults()

	# Store current gem5 run in the database
	db.put(self._id, self._getSerializable())

	print("Done storing the results of {}".format(" ".join(self.command)))

	def run(self, task: Any = None, cwd: str = ".") -> None:
	"""Actually run the test.

	Calls Popen with the command to fork a new process.
	Then, this function polls the process every 5 seconds to check if it
	has finished or not. Each time it checks, it dumps the json info so
	other applications can poll those files.

	task is the celery task that is running this gem5 instance.

	cwd is the directory to change to before running. This allows a server
	process to run in a different directory than the running process. Note
	that only the spawned process runs in the new directory.
	"""
	# Check if the run is already in the database
	db = artifact.getDBConnection()
	if self.hash in db:
	print(f"Error: Have already run {self.command}. Exiting!")
	return
	self._run(task, cwd)

	def rerun(self, task: Any = None, cwd: str = ".") -> None:
	"""Rerun the test.

	Calls Popen with the command to fork a new process.
	Then, this function polls the process every 5 seconds to check if it
	has finished or not. Each time it checks, it dumps the json info so
	other applications can poll those files.

	task is the celery task that is running this gem5 instance.

	cwd is the directory to change to before running. This allows a server
	process to run in a different directory than the running process. Note
	that only the spawned process runs in the new directory.
	"""
	# TODO: remove the old runs?
	self._run(task, cwd)

	def saveResults(self) -> None:
	"""Zip up the output directory and store the results in the
	database."""

	with zipfile.ZipFile(
	self.outdir / "results.zip", "w", zipfile.ZIP_DEFLATED
	) as zipf:
	for path in self.outdir.glob("*/"):
	if path.name == "results.zip":
	continue
	zipf.write(path, path.relative_to(self.outdir.parent))

	self.results = Artifact.registerArtifact(
	command=f"zip results.zip -r {self.outdir}",
	name=self.name,
	typ="directory",
	path=self.outdir / "results.zip",
	cwd="./",
	documentation="Compressed version of the results directory",
	)

	def __str__(self) -> str:
	return self.string + " -> " + self.status


	def getRuns(
	db: ArtifactDB, fs_only: bool = False, limit: int = 0
	) -> Iterable[gem5Run]:
	"""Returns a generator of gem5Run objects.

	If fs_only is True, then only full system runs will be returned.
	Limit specifies the maximum number of runs to return.
	"""

	if not fs_only:
	runs = db.searchByType("gem5 run", limit=limit)
	for run in runs:
	yield gem5Run.loadFromDict(run)

	fsruns = db.searchByType("gem5 run fs", limit=limit)
	for run in fsruns:
	yield gem5Run.loadFromDict(run)


	def getRunsByName(
	db: ArtifactDB, name: str, fs_only: bool = False, limit: int = 0
	) -> Iterable[gem5Run]:
	"""Returns a generator of gem5Run objects, which have the field "name"
	exactly the same as the name parameter. The name used in this query
	is case sensitive.

	If fs_only is True, then only full system runs will be returned.
	Limit specifies the maximum number of runs to return.
	"""

	if not fs_only:
	seruns = db.searchByNameType(name, "gem5 run", limit=limit)
	for run in seruns:
	yield gem5Run.loadFromDict(run)

	fsruns = db.searchByNameType(name, "gem5 run fs", limit=limit)

	for run in fsruns:
	yield gem5Run.loadFromDict(run)


	def getRunsByNameLike(
	db: ArtifactDB, name: str, fs_only: bool = False, limit: int = 0
	) -> Iterable[gem5Run]:
	"""Return a generator of gem5Run objects, which have the field "name"
	containing the name parameter as a substring. The name used in this
	query is case sensitive.

	If fs_only is True, then only full system runs will be returned.
	Limit specifies the maximum number of runs to return.
	"""

	if not fs_only:
	seruns = db.searchByLikeNameType(name, "gem5 run", limit=limit)

	for run in seruns:
	yield gem5Run.loadFromDict(run)

	fsruns = db.searchByLikeNameType(name, "gem5 run fs", limit=limit)

	for run in fsruns:
	yield gem5Run.loadFromDict(run)


	def getRerunnableRunsByNameLike(
	db: ArtifactDB, name: str, fs_only: bool = False, limit: int = 0
	) -> Iterable[gem5Run]:

	"""Returns a generator of gem5Run objects having rerunnable as true
	and the object "name" containing the name parameter as a substring. The
	parameter is case sensitive.

	If fs_only is True, then only full system runs will be returned.
	Limit specifies the maximum number of runs to return.
	"""

	for run in getRunsByNameLike(db, name, fs_only, limit):
	if run.rerunnable:
	yield run