blob: 8d406a9ad5d64f6254b98d721d1461e97ad324e6 [file] [log] [blame]
# Copyright (c) 2023 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import json
import requests
import base64
import os
from jsonschema import validate
class ResourceJsonCreator:
"""
This class generates the JSON which is pushed onto MongoDB.
On a high-level, it does the following:
- Adds certain fields to the JSON.
- Populates those fields.
- Makes sure the JSON follows the schema.
"""
# Global Variables
base_url = "https://github.com/gem5/gem5/tree/develop" # gem5 GitHub URL
resource_url_map = {
"dev": (
"https://gem5.googlesource.com/public/gem5-resources/+/refs/heads/"
"develop/resources.json?format=TEXT"
),
"22.1": (
"https://gem5.googlesource.com/public/gem5-resources/+/refs/heads/"
"stable/resources.json?format=TEXT"
),
"22.0": (
"http://resources.gem5.org/prev-resources-json/"
"resources-21-2.json"
),
"21.2": (
"http://resources.gem5.org/prev-resources-json/"
"resources-22-0.json"
),
}
def __init__(self):
self.schema = {}
with open("schema/schema.json", "r") as f:
self.schema = json.load(f)
def _get_file_data(self, url):
json_data = None
try:
json_data = requests.get(url).text
json_data = base64.b64decode(json_data).decode("utf-8")
return json.loads(json_data)
except:
json_data = requests.get(url).json()
return json_data
def _get_size(self, url):
"""
Helper function to return the size of a download through its URL.
Returns 0 if URL has an error.
:param url: Download URL
"""
try:
response = requests.head(url)
size = int(response.headers.get("content-length", 0))
return size
except Exception as e:
return 0
def _search_folder(self, folder_path, id):
"""
Helper function to find the instance of a string in a folder.
This is recursive, i.e., subfolders will also be searched.
:param folder_path: Path to the folder to begin searching
:param id: Phrase to search in the folder
:returns matching_files: List of file paths to the files containing id
"""
matching_files = []
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
if os.path.isfile(file_path):
with open(
file_path, "r", encoding="utf-8", errors="ignore"
) as f:
contents = f.read()
if id in contents:
file_path = file_path.replace("\\", "/")
matching_files.append(file_path)
elif os.path.isdir(file_path):
matching_files.extend(self._search_folder(file_path, id))
return matching_files
def _change_type(self, resource):
if resource["type"] == "workload":
# get the architecture from the name and remove 64 from it
resource["architecture"] = (
resource["name"].split("-")[0].replace("64", "").upper()
)
return resource
if "kernel" in resource["name"]:
resource["type"] = "kernel"
elif "bootloader" in resource["name"]:
resource["type"] = "bootloader"
elif "benchmark" in resource["documentation"]:
resource["type"] = "disk-image"
# if tags not in resource:
if "tags" not in resource:
resource["tags"] = []
resource["tags"].append("benchmark")
if (
"additional_metadata" in resource
and "root_partition" in resource["additional_metadata"]
and resource["additional_metadata"]["root_partition"]
is not None
):
resource["root_partition"] = resource["additional_metadata"][
"root_partition"
]
else:
resource["root_partition"] = ""
elif resource["url"] is not None and ".img.gz" in resource["url"]:
resource["type"] = "disk-image"
if (
"additional_metadata" in resource
and "root_partition" in resource["additional_metadata"]
and resource["additional_metadata"]["root_partition"]
is not None
):
resource["root_partition"] = resource["additional_metadata"][
"root_partition"
]
else:
resource["root_partition"] = ""
elif "binary" in resource["documentation"]:
resource["type"] = "binary"
elif "checkpoint" in resource["documentation"]:
resource["type"] = "checkpoint"
elif "simpoint" in resource["documentation"]:
resource["type"] = "simpoint"
return resource
def _extract_code_examples(self, resource, source):
"""
This function goes by IDs present in the resources DataFrame.
It finds which files use those IDs in gem5/configs.
It adds the GitHub URL of those files under "example".
It finds whether those files are used in gem5/tests/gem5.
If yes, it marks "tested" as True. If not, it marks "tested" as False.
"example" and "tested" are made into a JSON for every code example.
This list of JSONs is assigned to the 'code_examples' field of the
DataFrame.
:param resources: A DataFrame containing the current state of
resources.
:param source: Path to gem5
:returns resources: DataFrame with ['code-examples'] populated.
"""
id = resource["id"]
# search for files in the folder tree that contain the 'id' value
matching_files = self._search_folder(
source + "/configs", '"' + id + '"'
)
filenames = [os.path.basename(path) for path in matching_files]
tested_files = []
for file in filenames:
tested_files.append(
True
if len(self._search_folder(source + "/tests/gem5", file)) > 0
else False
)
matching_files = [
file.replace(source, self.base_url) for file in matching_files
]
code_examples = []
for i in range(len(matching_files)):
json_obj = {
"example": matching_files[i],
"tested": tested_files[i],
}
code_examples.append(json_obj)
return code_examples
def unwrap_resources(self, ver):
data = self._get_file_data(self.resource_url_map[ver])
resources = data["resources"]
new_resources = []
for resource in resources:
if resource["type"] == "group":
for group in resource["contents"]:
new_resources.append(group)
else:
new_resources.append(resource)
return new_resources
def _get_example_usage(self, resource):
if resource["category"] == "workload":
return f"Workload(\"{resource['id']}\")"
else:
return f"obtain_resource(resource_id=\"{resource['id']}\")"
def _parse_readme(self, url):
metadata = {
"tags": [],
"author": [],
"license": "",
}
try:
request = requests.get(url)
content = request.text
content = content.split("---")[1]
content = content.split("---")[0]
if "tags:" in content:
tags = content.split("tags:\n")[1]
tags = tags.split(":")[0]
tags = tags.split("\n")[:-1]
tags = [tag.strip().replace("- ", "") for tag in tags]
if tags == [""] or tags == None:
tags = []
metadata["tags"] = tags
if "author:" in content:
author = content.split("author:")[1]
author = author.split("\n")[0]
author = (
author.replace("[", "").replace("]", "").replace('"', "")
)
author = author.split(",")
author = [a.strip() for a in author]
metadata["author"] = author
if "license:" in content:
license = content.split("license:")[1].split("\n")[0]
metadata["license"] = license
except:
pass
return metadata
def _add_fields(self, resources, source):
new_resources = []
for resource in resources:
res = self._change_type(resource)
res["gem5_versions"] = ["23.0"]
res["resource_version"] = "1.0.0"
res["category"] = res["type"]
del res["type"]
res["id"] = res["name"]
del res["name"]
res["description"] = res["documentation"]
del res["documentation"]
if "additional_metadata" in res:
for k, v in res["additional_metadata"].items():
res[k] = v
del res["additional_metadata"]
res["example_usage"] = self._get_example_usage(res)
if "source" in res:
url = (
"https://raw.githubusercontent.com/gem5/"
"gem5-resources/develop/"
+ str(res["source"])
+ "/README.md"
)
res["source_url"] = (
"https://github.com/gem5/gem5-resources/tree/develop/"
+ str(res["source"])
)
else:
url = ""
res["source_url"] = ""
metadata = self._parse_readme(url)
if "tags" in res:
res["tags"].extend(metadata["tags"])
else:
res["tags"] = metadata["tags"]
res["author"] = metadata["author"]
res["license"] = metadata["license"]
res["code_examples"] = self._extract_code_examples(res, source)
if "url" in resource:
download_url = res["url"].replace(
"{url_base}", "http://dist.gem5.org/dist/develop"
)
res["url"] = download_url
res["size"] = self._get_size(download_url)
else:
res["size"] = 0
res = {k: v for k, v in res.items() if v is not None}
new_resources.append(res)
return new_resources
def _validate_schema(self, resources):
for resource in resources:
try:
validate(resource, schema=self.schema)
except Exception as e:
print(resource)
raise e
def create_json(self, version, source, output):
resources = self.unwrap_resources(version)
resources = self._add_fields(resources, source)
self._validate_schema(resources)
with open(output, "w") as f:
json.dump(resources, f, indent=4)