resources: add Pannotia to gem5-resources
This commit adds hipify'd versions of Pannotia to gem5-resouces. All
applications were tested in gem5 and on a Vega-class GPU to ensure
correctness. The original porting work was done by Gaurav Jain, I just
updated and tested them.
Note that Pannotia, from prior work by Joel Hestness, already had
support for m5ops. I also updated this support to work with the current
support for m5ops. All benchmark's Makefiles assuming GEM5_ROOT is
either already specificed as an environment variable or must be set
properly in the Makefile for m5ops to work correctly.
Change-Id: I562951130e840dc44b40270c57d8948a1cde459b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5-resources/+/51508
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Reviewed-by: Bobby R. Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby R. Bruce <bbruce@ucdavis.edu>
Tested-by: Bobby R. Bruce <bbruce@ucdavis.edu>
diff --git a/src/gpu/pannotia/README.md b/src/gpu/pannotia/README.md
new file mode 100644
index 0000000..dca4bed
--- /dev/null
+++ b/src/gpu/pannotia/README.md
@@ -0,0 +1,12 @@
+---
+title: Pannotia Tests
+tags:
+ - x86
+ - amdgpu
+layout: default
+permalink: resources/pannotia/
+shortdoc: >
+ Resources to build a disk image for each of the GCN3 Pannotia workloads.
+---
+
+This folder and its subfolders contain each of the 9 Pannotia benchmarks (there are 6 folders because Color, and PageRank, SSSP each have 2 versions). All of these benchmarks have been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU. See each application's README for details on how to compile and run them in gem5 using the GCN3 GPU model.
diff --git a/src/gpu/pannotia/bc/BC.cpp b/src/gpu/pannotia/bc/BC.cpp
new file mode 100644
index 0000000..df676d9
--- /dev/null
+++ b/src/gpu/pannotia/bc/BC.cpp
@@ -0,0 +1,322 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include <algorithm>
+#include "BC.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#ifdef GEM5_FUSION
+#define MAX_ITERS 150
+#else
+#include <stdint.h>
+#define MAX_ITERS INT32_MAX
+#endif
+
+void print_vector(int *vector, int num);
+void print_vectorf(float *vector, int num);
+
+int main(int argc, char **argv)
+{
+ char *tmpchar;
+
+ int num_nodes;
+ int num_edges;
+ bool directed = 1;
+
+ hipError_t err;
+
+ if (argc == 2) {
+ tmpchar = argv[1]; //graph inputfile
+ } else {
+ fprintf(stderr, "You did something wrong!\n");
+ exit(1);
+ }
+
+ // Parse graph and store it in a CSR format
+ csr_array *csr = parseCOO(tmpchar, &num_nodes, &num_edges, directed);
+
+ // Allocate the bc host array
+ float *bc_h = (float *)malloc(num_nodes * sizeof(float));
+ if (!bc_h) fprintf(stderr, "malloc failed bc_h\n");
+
+ // Create device-side buffers
+ float *bc_d, *sigma_d, *rho_d;
+ int *dist_d, *stop_d;
+ int *row_d, *col_d, *row_trans_d, *col_trans_d;
+
+ // Create betweenness centrality buffers
+ err = hipMalloc(&bc_d, num_nodes * sizeof(float));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc bc_d %s\n", hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&dist_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc dist_d %s\n", hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&sigma_d, num_nodes * sizeof(float));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc sigma_d %s\n", hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&rho_d, num_nodes * sizeof(float));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc rho_d %s\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Create termination variable buffer
+ err = hipMalloc(&stop_d, sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc stop_d %s\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Create graph buffers
+ err = hipMalloc(&row_d, (num_nodes + 1) * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc row_d %s\n", hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&col_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc col_d %s\n", hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&row_trans_d, (num_nodes + 1) * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc row_trans_d %s\n", hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&col_trans_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc col_trans_d %s\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ //double timer1, timer2;
+ //double timer3, timer4;
+
+ //timer1 = gettime();
+
+#ifdef GEM5_FUSION
+ m5_work_begin(0, 0);
+#endif
+
+ // Copy data to device-side buffers
+ err = hipMemcpy(row_d, csr->row_array, (num_nodes + 1) * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Copy data to device-side buffers
+ err = hipMemcpy(row_trans_d, csr->row_array_t, (num_nodes + 1) * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy row_trans_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(col_trans_d, csr->col_array_t, num_edges * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy col_trans_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ //timer3 = gettime();
+
+ // Set up kernel dimensions
+ int local_worksize = 128;
+ dim3 threads(local_worksize, 1, 1);
+ int num_blocks = (num_nodes + local_worksize - 1) / local_worksize;
+ dim3 grid(num_blocks, 1, 1);
+
+ // Initialization
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(clean_bc), dim3(grid), dim3(threads ), 0, 0, bc_d, num_nodes);
+
+ // Main computation loop
+ for (int i = 0; i < num_nodes && i < MAX_ITERS; i++) {
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(clean_1d_array), dim3(grid), dim3(threads ), 0, 0, i, dist_d, sigma_d, rho_d,
+ num_nodes);
+
+ // Depth of the traversal
+ int dist = 0;
+ // Termination variable
+ int stop = 1;
+
+ // Traverse the graph from the source node i
+ do {
+ stop = 0;
+
+ // Copy the termination variable to the device
+ hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(bfs_kernel), dim3(grid), dim3(threads ), 0, 0, row_d, col_d, dist_d, rho_d, stop_d,
+ num_nodes, num_edges, dist);
+
+ // Copy back the termination variable from the device
+ hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+
+ // Another level
+ dist++;
+
+ } while (stop);
+
+ hipDeviceSynchronize();
+
+ // Traverse back from the deepest part of the tree
+ while (dist) {
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(backtrack_kernel), dim3(grid), dim3(threads ), 0, 0, row_trans_d, col_trans_d,
+ dist_d, rho_d, sigma_d,
+ num_nodes, num_edges, dist, i,
+ bc_d);
+
+ // Back one level
+ dist--;
+ }
+ hipDeviceSynchronize();
+ fprintf(stdout, "Completed iteration %d\n", i);
+ }
+ hipDeviceSynchronize();
+ //timer4 = gettime();
+
+ // Copy back the results for the bc array
+ err = hipMemcpy(bc_h, bc_d, num_nodes * sizeof(float), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: read buffer bc_d (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+#ifdef GEM5_FUSION
+ m5_work_end(0, 0);
+#endif
+
+ //timer2 = gettime();
+
+ //printf("kernel + memcopy time = %lf ms\n", (timer4 - timer3) * 1000);
+ //printf("kernel execution time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+ //dump the results to the file
+ print_vectorf(bc_h, num_nodes);
+#endif
+
+ // Clean up the host-side buffers
+ free(bc_h);
+ free(csr->row_array);
+ free(csr->col_array);
+ free(csr->data_array);
+ free(csr->row_array_t);
+ free(csr->col_array_t);
+ free(csr->data_array_t);
+ free(csr);
+
+ // Clean up the device-side buffers
+ hipFree(bc_d);
+ hipFree(dist_d);
+ hipFree(sigma_d);
+ hipFree(rho_d);
+ hipFree(stop_d);
+ hipFree(row_d);
+ hipFree(col_d);
+ hipFree(row_trans_d);
+ hipFree(col_trans_d);
+
+ return 0;
+}
+
+void print_vector(int *vector, int num)
+{
+ for (int i = 0; i < num; i++)
+ printf("%d: %d \n", i + 1, vector[i]);
+ printf("\n");
+}
+
+void print_vectorf(float *vector, int num)
+{
+
+ FILE * fp = fopen("result.out", "w");
+ if (!fp) {
+ printf("ERROR: unable to open result.txt\n");
+ }
+
+ for (int i = 0; i < num; i++) {
+ fprintf(fp, "%f\n", vector[i]);
+ }
+
+ fclose(fp);
+
+}
diff --git a/src/gpu/pannotia/bc/BC.h b/src/gpu/pannotia/bc/BC.h
new file mode 100644
index 0000000..0abdb12
--- /dev/null
+++ b/src/gpu/pannotia/bc/BC.h
@@ -0,0 +1,230 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+typedef struct csr_array_t {
+
+ int *row_array;
+ int *col_array;
+ int *data_array;
+
+ int *row_array_t;
+ int *col_array_t;
+ int *data_array_t;
+
+} csr_array;
+
+
+typedef struct cooedgetuple {
+ int row;
+ int col;
+ int val;
+} CooTuple;
+
+bool compare(
+ CooTuple elem1,
+ CooTuple elem2)
+{
+ if (elem1.row < elem2.row)
+ return true;
+ return false;
+}
+
+void transform(CooTuple *tuple_array, int num_edges, int *row_array, int *col_array, int *data_array)
+{
+
+ int row_cnt = 0;
+ int prev = -1;
+ int idx;
+
+ for (idx = 0; idx < num_edges; idx++) {
+ int curr = tuple_array[idx].row;
+ if (curr != prev) {
+ row_array[row_cnt++] = idx;
+ prev = curr;
+ }
+
+ col_array[idx] = tuple_array[idx].col;
+ data_array[idx] = tuple_array[idx].val;
+
+ }
+ row_array[row_cnt] = idx;
+}
+
+csr_array * parseCOO(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+ int cnt = 0;
+ int cnt1 = 0;
+ unsigned int lineno = 0;
+ char sp[2], a, p;
+ char * line = (char *)malloc(8192 * sizeof(char));
+ int num_nodes = 0, num_edges = 0;
+
+ FILE *fptr;
+ CooTuple *tuple_array = NULL;
+ CooTuple *tuple_array_t = NULL;
+
+ fptr = fopen(tmpchar, "r");
+ if (!fptr) {
+ fprintf(stderr, "Error when opennning file: %s\n", tmpchar);
+ perror("ERROR: ");
+ exit(1);
+ }
+
+ printf("Opening file: %s\n", tmpchar);
+
+ while (fgets(line, 8192, fptr)) {
+ int head, tail, weight;
+ switch (line[0]) {
+ case 'c':
+ break;
+ case 'p':
+ sscanf(line, "%c %s %d %d", &p, sp, p_num_nodes, p_num_edges);
+
+ if (!directed) {
+ *p_num_edges = *p_num_edges * 2;
+ printf("This is an undirected graph\n");
+ } else {
+ printf("This is a directed graph\n");
+ }
+
+ num_nodes = *p_num_nodes;
+ num_edges = *p_num_edges;
+
+ printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+
+ tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+ if (!tuple_array) printf("malloc failed\n");
+ tuple_array_t = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+ if (!tuple_array_t) printf("malloc failed\n");
+
+ break;
+
+ case 'a':
+ sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+
+ if (tail == head) printf("reporting self loop\n");
+
+ CooTuple temp, temp1;
+
+ temp.row = head - 1;
+ temp.col = tail - 1;
+ temp.val = weight;
+
+ temp1.row = tail - 1;
+ temp1.col = head - 1;
+ temp1.val = weight;
+
+ tuple_array[cnt++] = temp;
+ tuple_array_t[cnt1++] = temp1;
+
+ if (!directed) {
+
+ temp.row = tail - 1;
+ temp.col = head - 1;
+ temp.val = weight;
+
+ temp1.row = head - 1;
+ temp1.col = tail - 1;
+ temp1.val = weight;
+
+ tuple_array[cnt++] = temp;
+ tuple_array_t[cnt1++] = temp1;
+
+ }
+
+ break;
+ default:
+ fprintf(stderr, "exiting loop\n");
+ break;
+ }
+ lineno++;
+ }
+
+ std::stable_sort(tuple_array, tuple_array + num_edges, compare);
+ std::stable_sort(tuple_array_t, tuple_array_t + num_edges, compare);
+
+ int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+ int *col_array = (int *)malloc(num_edges * sizeof(int));
+ int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+ int *row_array_t = (int *)malloc((num_nodes + 1) * sizeof(int));
+ int *col_array_t = (int *)malloc(num_edges * sizeof(int));
+ int *data_array_t = (int *)malloc(num_edges * sizeof(int));
+
+ transform(tuple_array, num_edges, row_array, col_array, data_array);
+ transform(tuple_array_t, num_edges, row_array_t, col_array_t, data_array_t);
+
+ fclose(fptr);
+ free(tuple_array);
+ free(tuple_array_t);
+
+ csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+
+ csr -> row_array = row_array;
+ csr -> col_array = col_array;
+ csr -> data_array = data_array;
+
+ csr -> row_array_t = row_array_t;
+ csr -> col_array_t = col_array_t;
+ csr -> data_array_t = data_array_t;
+
+ free(line);
+
+ return csr;
+}
+
diff --git a/src/gpu/pannotia/bc/Makefile b/src/gpu/pannotia/bc/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/bc/Makefile
@@ -0,0 +1,11 @@
+default:
+ make -f Makefile.default
+
+clean:
+ make -f Makefile.default clean
+
+gem5-fusion:
+ make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+ make -f Makefile.gem5-fusion clean
diff --git a/src/gpu/pannotia/bc/Makefile.default b/src/gpu/pannotia/bc/Makefile.default
new file mode 100644
index 0000000..2147843
--- /dev/null
+++ b/src/gpu/pannotia/bc/Makefile.default
@@ -0,0 +1,24 @@
+CPPSRC = ../graph_parser/util.cpp
+CPPSRC = BC.cpp
+EXECUTABLE = bc_hip
+# BC has a compilation error in host code with ROCm 4.0, so O2 and O3 do not
+# pass even on real GPUs
+OPTS = -O1
+
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPSRC) ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) ../graph_parser/util.cpp $(CPPSRC) -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/bc/Makefile.gem5-fusion b/src/gpu/pannotia/bc/Makefile.gem5-fusion
new file mode 100644
index 0000000..6226539
--- /dev/null
+++ b/src/gpu/pannotia/bc/Makefile.gem5-fusion
@@ -0,0 +1,28 @@
+CPPSRC = BC.cpp
+EXECUTABLE = bc.gem5
+# BC has a compilation error in host code with ROCm 4.0, so O2 and O3 do not
+# pass even on real GPUs
+OPTS = -O1
+
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+GEM5_PATH ?= /path/to/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPSRC) ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) ../graph_parser/util.cpp $(CPPSRC) -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/bc/README.md b/src/gpu/pannotia/bc/README.md
new file mode 100644
index 0000000..60499b3
--- /dev/null
+++ b/src/gpu/pannotia/bc/README.md
@@ -0,0 +1,49 @@
+---
+title: Pannotia BC Test
+tags:
+ - x86
+ - amdgpu
+layout: default
+permalink: resources/pannotia/bc
+shortdoc: >
+ Resources to build a disk image with the GCN3 Pannotia BC workload.
+---
+
+Betweenness Centrality (BC) is a graph analytics application that is part of the Pannotia benchmark suite. It is used to calculate betweenness centrality scores for all the vertices in a graph. The provided version is for use with the gpu-compute model of gem5. Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling BC, compiling the GCN3_X86/Vega_X86 versions of gem5, and running BC on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+To compile BC:
+
+```
+cd src/gpu/pannotia/bc
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead. Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/). By default, the Makefile builds for gfx801 and gfx803, and is placed in the src/gpu/pannotia/bc/bin folder.
+
+## Compiling GCN3_X86/gem5.opt
+
+BC is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture. The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running BC on GCN3_X86/gem5.opt
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/bc/bin -c bc.gem5 --options="1k_128k.gr"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>. We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr>), as this is the smallest input designed to run with BC.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.
diff --git a/src/gpu/pannotia/bc/kernel.h b/src/gpu/pannotia/bc/kernel.h
new file mode 100644
index 0000000..442f007
--- /dev/null
+++ b/src/gpu/pannotia/bc/kernel.h
@@ -0,0 +1,241 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#ifndef KERNEL_H_
+#define KERNEL_H_
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief Breadth-first traversal
+ * @param row CSR pointer array
+ * @param col CSR column array
+ * @param d Distance array
+ * @param rho Rho array
+ * @param p Dependency array
+ * @param cont Termination variable
+ * @param num_nodes Termination variable
+ * @param num_edges Termination variable
+ * @param dist Current traversal layer
+ */
+
+__global__ void
+bfs_kernel(int *row, int *col, int *d, float *rho, int *cont,
+ const int num_nodes, const int num_edges, const int dist)
+{
+ int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+ //navigate the current layer
+ if (tid < num_nodes && d[tid] == dist) {
+
+ //get the starting and ending pointers
+ //of the neighbor list
+
+ int start = row[tid];
+ int end;
+ if (tid + 1 < num_nodes)
+ end = row[tid + 1];
+ else
+ end = num_edges;
+
+ //navigate through the neighbor list
+ for (int edge = start; edge < end; edge++) {
+ int w = col[edge];
+ if (d[w] < 0) {
+ *cont = 1;
+ //traverse another layer
+ d[w] = dist + 1;
+ }
+ //transfer the rho value to the neighbor
+ if (d[w] == (dist + 1)) {
+ atomicAdd(&rho[w], rho[tid]);
+ }
+ }
+ }
+}
+
+/**
+ * @brief Back traversal
+ * @param row CSR pointer array
+ * @param col CSR column array
+ * @param d Distance array
+ * @param rho Rho array
+ * @param sigma Sigma array
+ * @param p Dependency array
+ * @param cont Termination variable
+ * @param num_nodes Termination variable
+ * @param num_edges Termination variable
+ * @param dist Current traversal layer
+ * @param s Source vertex
+ * @param bc Betweeness Centrality array
+ */
+
+__global__ void
+backtrack_kernel(int *row, int *col, int *d, float *rho, float *sigma,
+ const int num_nodes, const int num_edges, const int dist,
+ const int s, float* bc)
+{
+ int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+ // Navigate the current layer
+ if (tid < num_nodes && d[tid] == dist - 1) {
+
+ int start = row[tid];
+ int end;
+ if (tid + 1 < num_nodes)
+ end = row[tid + 1];
+ else
+ end = num_edges;
+
+ // Get the starting and ending pointers
+ // of the neighbor list in the reverse graph
+ for (int edge = start; edge < end; edge++) {
+ int w = col[edge];
+ // Update the sigma value traversing back
+ if (d[w] == dist - 2)
+ atomicAdd(&sigma[w], rho[w] / rho[tid] * (1 + sigma[tid]));
+ }
+
+ // Update the BC value
+ if (tid != s)
+ bc[tid] = bc[tid] + sigma[tid];
+ }
+
+}
+
+/**
+ * @brief back_sum_kernel (not used)
+ * @param s Source vertex
+ * @param dist Current traversal layer
+ * @param d Distance array
+ * @param sigma Sigma array
+ * @param bc Betweeness Centrality array
+ * @param num_nodes Termination variable
+ * @param num_edges Termination variable
+ */
+__global__ void
+back_sum_kernel(const int s, const int dist, int *d, float *sigma, float *bc,
+ const int num_nodes)
+{
+ int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ // If it is not the source
+ if (s != tid && d[tid] == dist - 1) {
+ bc[tid] = bc[tid] + sigma[tid];
+ }
+ }
+}
+
+/**
+ * @brief array set 1D
+ * @param s Source vertex
+ * @param dist_array Distance array
+ * @param sigma Sigma array
+ * @param rho Rho array
+ * @param num_nodes Termination variable
+ */
+__global__ void
+clean_1d_array(const int source, int *dist_array, float *sigma, float *rho,
+ const int num_nodes)
+{
+ int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+
+ sigma[tid] = 0;
+
+ if (tid == source) {
+ // If source vertex rho = 1, dist = 0
+ rho[tid] = 1;
+ dist_array[tid] = 0;
+ } else {
+ // If other vertices rho = 0, dist = -1
+ rho[tid] = 0;
+ dist_array[tid] = -1;
+ }
+ }
+}
+
+/**
+ * @brief array set 2D
+ * @param p Dependency array
+ * @param num_nodes Number of vertices
+ */
+__global__ void clean_2d_array(int *p, const int num_nodes)
+{
+ int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+ if (tid < num_nodes * num_nodes)
+ p[tid] = 0;
+}
+
+/**
+ * @brief clean BC
+ * @param bc_d Betweeness Centrality array
+ * @param num_nodes Number of vertices
+ */
+__global__ void clean_bc(float *bc_d, const int num_nodes)
+{
+ int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+ if (tid < num_nodes)
+ bc_d[tid] = 0;
+}
+
+#endif // KERNEL_H_
diff --git a/src/gpu/pannotia/buildall.sh b/src/gpu/pannotia/buildall.sh
new file mode 100644
index 0000000..78192d2
--- /dev/null
+++ b/src/gpu/pannotia/buildall.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+extraparams=""
+buildparams=""
+if [ ! -z "$1" ]
+then
+ extraparams="$1"
+ buildparams="gem5-fusion"
+fi
+
+function savebin {
+ if [ "$2" == "" ]
+ then
+ backupbench="$1"
+ else
+ backupbench="gem5_fusion_$1"
+ fi
+ if [ -e "$backupbench" ]
+ then
+ echo "Saving bin $backupbench"
+ mv $backupbench $backupbench.bak
+ fi
+}
+
+function restorebin {
+ if [ "$2" == "" ]
+ then
+ backupbench="$1"
+ else
+ backupbench="gem5_fusion_$1"
+ fi
+ if [ -e "$backupbench.bak" ]
+ then
+ echo "Restoring bin $backupbench"
+ mv $backupbench.bak $backupbench
+ fi
+}
+
+for bench in bc color fw mis pagerank sssp
+do
+ echo $bench
+ pushd . >& /dev/null
+ cd $bench
+ make clean; make clean-gem5-fusion
+ make $buildparams $extraparams
+ if [ "$bench" == "color" ]
+ then
+ savebin "color_max" $buildparams
+ make clean; make clean-gem5-fusion
+ make $buildparams VARIANT=MAXMIN $extraparams
+ restorebin "color_max" $buildparams
+ elif [ "$bench" == "fw" ]
+ then
+ savebin "fw" $buildparams
+ make clean; make clean-gem5-fusion
+ make $buildparams VARIANT=BLOCK $extraparams
+ restorebin "fw" $buildparams
+ elif [ "$bench" == "pagerank" ]
+ then
+ savebin "pagerank" $buildparams
+ make clean; make clean-gem5-fusion
+ make $buildparams VARIANT=SPMV $extraparams
+ restorebin "pagerank" $buildparams
+ elif [ "$bench" == "sssp" ]
+ then
+ savebin "sssp" $buildparams
+ make clean; make clean-gem5-fusion
+ make $buildparams VARIANT=ELL $extraparams
+ restorebin "sssp" $buildparams
+ fi
+ popd >& /dev/null
+done
diff --git a/src/gpu/pannotia/cleanall.sh b/src/gpu/pannotia/cleanall.sh
new file mode 100644
index 0000000..256c4d3
--- /dev/null
+++ b/src/gpu/pannotia/cleanall.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+for bench in bc color fw mis pagerank sssp
+do
+ echo $bench
+ pushd . >& /dev/null
+ cd $bench
+ make clean; make clean-gem5-fusion
+ popd >& /dev/null
+done
diff --git a/src/gpu/pannotia/color/Makefile b/src/gpu/pannotia/color/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/color/Makefile
@@ -0,0 +1,11 @@
+default:
+ make -f Makefile.default
+
+clean:
+ make -f Makefile.default clean
+
+gem5-fusion:
+ make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+ make -f Makefile.gem5-fusion clean
diff --git a/src/gpu/pannotia/color/Makefile.default b/src/gpu/pannotia/color/Makefile.default
new file mode 100644
index 0000000..6cada0d
--- /dev/null
+++ b/src/gpu/pannotia/color/Makefile.default
@@ -0,0 +1,27 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+BASEEXE = color
+VARIANT ?= MAX
+ifeq ($(VARIANT),MAX)
+ EXECUTABLE = $(BASEEXE)_max
+ CPPFILES += coloring_max.cpp
+else ifeq ($(VARIANT),MAXMIN)
+ EXECUTABLE = $(BASEEXE)_maxmin
+ CPPFILES += coloring_maxmin.cpp
+endif
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) -O3 --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/color/Makefile.gem5-fusion b/src/gpu/pannotia/color/Makefile.gem5-fusion
new file mode 100644
index 0000000..703bf44
--- /dev/null
+++ b/src/gpu/pannotia/color/Makefile.gem5-fusion
@@ -0,0 +1,32 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+GEM5_PATH ?= /path/to/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I/../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BASEEXE = color
+VARIANT ?= MAX
+ifeq ($(VARIANT),MAX)
+ EXECUTABLE = $(BASEEXE)_max.gem5
+ CPPFILES += coloring_max.cpp
+else ifeq ($(VARIANT),MAXMIN)
+ EXECUTABLE = $(BASEEXE)_maxmin.gem5
+ CPPFILES += coloring_maxmin.cpp
+endif
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) -O3 --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/color/README.md b/src/gpu/pannotia/color/README.md
new file mode 100644
index 0000000..bc425bf
--- /dev/null
+++ b/src/gpu/pannotia/color/README.md
@@ -0,0 +1,68 @@
+---
+title: Pannotia Color Test
+tags:
+ - x86
+ - amdgpu
+layout: default
+permalink: resources/pannotia/bc
+shortdoc: >
+ Resources to build a disk image with the GCN3 Pannotia Color workload.
+---
+
+Graph Coloring (CLR) is a graph analytics application that is part of the Pannotia benchmark suite. It is used to label the vertices of a graph with colors such that no two adjacent vertices share the same color. The provided version is for use with the gpu-compute model of gem5. Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling both CLR variants, compiling the GCN3_X86/Vega_X86 versions of gem5, and running both CLR variants on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+To compile Color:
+
+Color has two variants: max and maxmin. To compile the "max" variant:
+
+```
+cd src/gpu/pannotia/clr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+To compile the "maxmin" variant:
+
+```
+cd src/gpu/pannotia/clr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu bash -c "export VARIANT=MAXMIN ; make gem5-fusion"
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead. Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/). By default, for both variants the Makefile builds for gfx801 and gfx803, and the binaries are placed in the src/gpu/pannotia/clr/bin folder. Moreover, by default the VARIANT variable Color's Makefile assumes the max variant is being used, hence why this variable does not need to be set for compiling it.
+
+## Compiling GCN3_X86/gem5.opt
+
+Color is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture. The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running Color on GCN3_X86/gem5.opt
+
+The following command shows how to run the CLR max version:
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/clr/bin -c color_max.gem5 --options="1k_128k.gr 0"
+```
+
+To run the CLR maxmin version:
+
+# Assuming gem5, pannotia (input graphs, see below), and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/clr/bin -c color_maxmin.gem5 --options="1k_128k.gr 0"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>. We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr>), as this is the smallest input that can be run with CLR. Note that 1k_128k is not designed for Color specifically though -- the above link has larger graphs designed to run with Color that you should consider using for larger experiments.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.
diff --git a/src/gpu/pannotia/color/coloring_max.cpp b/src/gpu/pannotia/color/coloring_max.cpp
new file mode 100644
index 0000000..b85245e
--- /dev/null
+++ b/src/gpu/pannotia/color/coloring_max.cpp
@@ -0,0 +1,303 @@
+/************************************************************************************\
+ * *
+ * Copyright © 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel_max.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#define RANGE 2048
+
+void print_vector(int *vector, int num);
+
+int main(int argc, char **argv)
+{
+ char *tmpchar;
+
+ int num_nodes;
+ int num_edges;
+ int file_format = 1;
+ bool directed = 0;
+
+ hipError_t err = hipSuccess;
+
+ if (argc == 3) {
+ tmpchar = argv[1]; //graph inputfile
+ file_format = atoi(argv[2]); //graph format
+ } else {
+ fprintf(stderr, "You did something wrong!\n");
+ exit(1);
+ }
+
+ srand(7);
+
+ // Allocate the CSR structure
+ csr_array *csr;
+
+ // Parse graph file and store into a CSR format
+ if (file_format == 1)
+ csr = parseMetis(tmpchar, &num_nodes, &num_edges, directed);
+ else if (file_format == 0)
+ csr = parseCOO(tmpchar, &num_nodes, &num_edges, directed);
+ else {
+ printf("reserve for future");
+ exit(1);
+ }
+
+ // Allocate the vertex value array
+ int *node_value = (int *)malloc(num_nodes * sizeof(int));
+ if (!node_value) fprintf(stderr, "node_value malloc failed\n");
+ // Allocate the color array
+ int *color = (int *)malloc(num_nodes * sizeof(int));
+ if (!color) fprintf(stderr, "color malloc failed\n");
+
+ // Initialize all the colors to -1
+ // Randomize the value for each vertex
+ for (int i = 0; i < num_nodes; i++) {
+ color[i] = -1;
+ node_value[i] = rand() % RANGE;
+ }
+
+ int *row_d;
+ int *col_d;
+ int *max_d;
+
+ int *color_d;
+ int *node_value_d;
+ int *stop_d;
+
+ // Create device-side buffers for the graph
+ err = hipMalloc(&row_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&col_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc col_d (size:%d): %s\n", num_edges , hipGetErrorString(err));
+ return -1;
+ }
+
+ // Termination variable
+ err = hipMalloc(&stop_d, sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n", 1 , hipGetErrorString(err));
+ return -1;
+ }
+
+ // Create device-side buffers for color
+ err = hipMalloc(&color_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc color_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&node_value_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc node_value_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&max_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc max_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+
+ // Copy data to device-side buffers
+// double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+ m5_work_begin(0, 0);
+#endif
+
+ err = hipMemcpy(color_d, color, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy color_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(max_d, color, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy max_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(row_d, csr->row_array, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(node_value_d, node_value, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy node_value_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ int block_size = 256;
+ int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+ // Set up kernel dimensions
+ dim3 threads(block_size, 1, 1);
+ dim3 grid(num_blocks, 1, 1);
+
+ int stop = 1;
+ int graph_color = 1;
+
+ // Main computation loop
+// double timer3 = gettime();
+
+ while (stop) {
+
+ stop = 0;
+
+ // Copy the termination variable to the device
+ err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: write stop_d: %s\n", hipGetErrorString(err));
+ }
+
+ // Launch the color kernel 1
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(color1), dim3(grid), dim3(threads ), 0, 0, row_d, col_d, node_value_d, color_d,
+ stop_d, max_d, graph_color, num_nodes,
+ num_edges);
+
+ // Launch the color kernel 2
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(color2), dim3(grid), dim3(threads ), 0, 0, node_value_d, color_d, max_d, graph_color,
+ num_nodes, num_edges);
+
+ err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: read stop_d: %s\n", hipGetErrorString(err));
+ }
+
+ // Increment the color for the next iter
+ graph_color++;
+
+ }
+ hipDeviceSynchronize();
+
+// double timer4 = gettime();
+
+ // Copy back the color array
+ err = hipMemcpy(color, color_d, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ printf("ERROR: hipMemcpy(): %s\n", hipGetErrorString(err));
+ return -1;
+ }
+
+#ifdef GEM5_FUSION
+ m5_work_end(0, 0);
+#endif
+
+// double timer2 = gettime();
+
+ // Print out color and timing statistics
+ printf("total number of colors used: %d\n", graph_color);
+// printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+// printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+ // Dump the color array into an output file
+ print_vector(color, num_nodes);
+#endif
+
+ // Free host-side buffers
+ free(node_value);
+ free(color);
+ csr->freeArrays();
+ free(csr);
+
+ // Free CUDA buffers
+ hipFree(row_d);
+ hipFree(col_d);
+ hipFree(max_d);
+ hipFree(color_d);
+ hipFree(node_value_d);
+ hipFree(stop_d);
+
+ return 0;
+
+}
+
+void print_vector(int *vector, int num)
+{
+ FILE * fp = fopen("result.out", "w");
+ if (!fp) {
+ printf("ERROR: unable to open result.txt\n");
+ }
+
+ for (int i = 0; i < num; i++)
+ fprintf(fp, "%d: %d\n", i + 1, vector[i]);
+
+ fclose(fp);
+}
diff --git a/src/gpu/pannotia/color/coloring_maxmin.cpp b/src/gpu/pannotia/color/coloring_maxmin.cpp
new file mode 100644
index 0000000..01e173d
--- /dev/null
+++ b/src/gpu/pannotia/color/coloring_maxmin.cpp
@@ -0,0 +1,306 @@
+/************************************************************************************\
+ * *
+ * Copyright © 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel_maxmin.cu"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#define RANGE 2048
+
+void print_vector(int *vector, int num);
+
+int main(int argc, char **argv)
+{
+ char *tmpchar;
+
+ int num_nodes;
+ int num_edges;
+ int file_format = 1;
+ bool directed = 0;
+
+ hipError_t err = hipSuccess;
+
+ if (argc == 3) {
+ tmpchar = argv[1]; //graph inputfile
+ file_format = atoi(argv[2]); //graph format
+ } else {
+ fprintf(stderr, "You did something wrong!\n");
+ exit(1);
+ }
+
+ srand(7);
+
+ // Allocate the CSR structure
+ csr_array *csr;
+
+ // Parse graph file and store into a CSR format
+ if (file_format == 1)
+ csr = parseMetis(tmpchar, &num_nodes, &num_edges, directed);
+ else if (file_format == 0)
+ csr = parseCOO(tmpchar, &num_nodes, &num_edges, directed);
+ else {
+ printf("reserve for future");
+ exit(1);
+ }
+
+ // Allocate the vertex value array
+ int *node_value = (int *)malloc(num_nodes * sizeof(int));
+ if (!node_value) fprintf(stderr, "node_value malloc failed\n");
+ // Allocate the color array
+ int *color = (int *)malloc(num_nodes * sizeof(int));
+ if (!color) fprintf(stderr, "color malloc failed\n");
+
+ // Initialize all the colors to -1
+ // Randomize the value for each vertex
+ for (int i = 0; i < num_nodes; i++) {
+ color[i] = -1;
+ node_value[i] = rand() % RANGE;
+ }
+
+ int *row_d;
+ int *col_d;
+ int *max_d;
+ int *min_d;
+
+ int *color_d;
+ int *node_value_d;
+ int *stop_d;
+
+ // Create device-side buffers for the graph
+ err = hipMalloc(&row_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&col_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc col_d (size:%d): %s\n", num_edges , hipGetErrorString(err));
+ return -1;
+ }
+
+ // Termination variable
+ err = hipMalloc(&stop_d, sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n", 1 , hipGetErrorString(err));
+ return -1;
+ }
+
+ // Create device-side buffers for color
+ err = hipMalloc(&color_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc color_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&node_value_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc node_value_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&max_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc max_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&min_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc min_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+
+ // Copy data to device-side buffers
+ double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+ m5_work_begin(0, 0);
+#endif
+
+ err = hipMemcpy(color_d, color, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy color_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(row_d, csr->row_array, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(node_value_d, node_value, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy node_value_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ int block_size = 256;
+ int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+ // Set up kernel dimensions
+ dim3 threads(block_size, 1, 1);
+ dim3 grid(num_blocks, 1, 1);
+
+ int stop = 1;
+ int graph_color = 1;
+
+ // Initialize arrays
+ hipLaunchKernelGGL(ini, dim3(grid), dim3(threads ), 0, 0, max_d, min_d, num_nodes);
+
+ // Main computation loop
+ double timer3 = gettime();
+
+ while (stop) {
+
+ stop = 0;
+
+ // Copy the termination variable to the device
+ err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: write stop_d: %s\n", hipGetErrorString(err));
+ }
+
+ // Launch the color kernel 1
+ hipLaunchKernelGGL(color1, dim3(grid), dim3(threads ), 0, 0, row_d, col_d, node_value_d, color_d,
+ stop_d, max_d, min_d, graph_color,
+ num_nodes, num_edges);
+
+ // Launch the color kernel 2
+ hipLaunchKernelGGL(color2, dim3(grid), dim3(threads ), 0, 0, node_value_d, color_d, max_d, min_d,
+ graph_color, num_nodes, num_edges);
+
+ err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: read stop_d: %s\n", hipGetErrorString(err));
+ }
+
+ // Update the color label for the next iter
+ graph_color = graph_color + 2;
+
+ }
+ hipDeviceSynchronize();
+
+ double timer4 = gettime();
+
+ // Copy back the color array
+ err = hipMemcpy(color, color_d, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ printf("ERROR: hipMemcpy(): %s\n", hipGetErrorString(err));
+ return -1;
+ }
+
+#ifdef GEM5_FUSION
+ m5_work_end(0, 0);
+#endif
+
+ double timer2 = gettime();
+
+ // Print out color and timing statistics
+ printf("total number of colors used: %d\n", graph_color);
+ printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+ printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+ // Dump the color array into an output file
+ print_vector(color, num_nodes);
+#endif
+
+ // Free host-side buffers
+ free(node_value);
+ free(color);
+ csr->freeArrays();
+ free(csr);
+
+ // Free CUDA buffers
+ hipFree(row_d);
+ hipFree(col_d);
+ hipFree(max_d);
+ hipFree(color_d);
+ hipFree(node_value_d);
+ hipFree(stop_d);
+
+ return 0;
+
+}
+
+void print_vector(int *vector, int num)
+{
+ FILE * fp = fopen("result.out", "w");
+ if (!fp) {
+ printf("ERROR: unable to open result.txt\n");
+ }
+
+ for (int i = 0; i < num; i++)
+ fprintf(fp, "%d: %d\n", i + 1, vector[i]);
+
+ fclose(fp);
+}
diff --git a/src/gpu/pannotia/color/kernel_max.h b/src/gpu/pannotia/color/kernel_max.h
new file mode 100644
index 0000000..f560a29
--- /dev/null
+++ b/src/gpu/pannotia/color/kernel_max.h
@@ -0,0 +1,139 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#ifndef KERNEL_MAX_H
+#define KERNEL_MAX_H
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief color kernel 1
+ * @param row CSR pointer array
+ * @param col CSR column array
+ * @param node_value Vertex value array
+ * @param color_array Color value array
+ * @param stop Termination variable
+ * @param max_d Max array
+ * @param color Current color label
+ * @param num_nodes Number of vertices
+ * @param num_edges Number of edges
+ */
+__global__ void color1(int *row, int *col, int *node_value, int *color_array,
+ int *stop, int *max_d, const int color,
+ const int num_nodes, const int num_edges)
+{
+ // Get my thread workitem id
+ int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ // If the vertex is still not colored
+ if (color_array[tid] == -1) {
+
+ // Get the start and end pointer of the neighbor list
+ int start = row[tid];
+ int end;
+ if (tid + 1 < num_nodes)
+ end = row[tid + 1];
+ else
+ end = num_edges;
+
+ int maximum = -1;
+ // Navigate the neighbor list
+ for (int edge = start; edge < end; edge++) {
+ // Determine if the vertex value is the maximum in the neighborhood
+ if (color_array[col[edge]] == -1 && start != end - 1) {
+ *stop = 1;
+ if (node_value[col[edge]] > maximum)
+ maximum = node_value[col[edge]];
+ }
+ }
+ // Assign maximum the max array
+ max_d[tid] = maximum;
+ }
+ }
+}
+
+
+/**
+ * @brief color kernel 2
+ * @param node_value Vertex value array
+ * @param color_array Color value array
+ * @param max_d Max array
+ * @param color Current color label
+ * @param num_nodes Number of vertices
+ * @param num_edges Number of edges
+ */
+__global__ void color2(int *node_value, int *color_array, int *max_d,
+ const int color, const int num_nodes,
+ const int num_edges)
+{
+ // Get my workitem id
+ int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ // If the vertex is still not colored
+ if (color_array[tid] == -1) {
+ if (node_value[tid] >= max_d[tid])
+ // Assign a color
+ color_array[tid] = color;
+ }
+ }
+
+}
+
+#endif // KERNEL_MAX_H
diff --git a/src/gpu/pannotia/color/kernel_maxmin.h b/src/gpu/pannotia/color/kernel_maxmin.h
new file mode 100644
index 0000000..eb93f80
--- /dev/null
+++ b/src/gpu/pannotia/color/kernel_maxmin.h
@@ -0,0 +1,167 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#ifndef KERNEL_MAXMIN_H
+#define KERNEL_MAXMIN_H
+
+#include "hip/hip_runtime.h"
+
+#define BIG_NUM 999999
+
+/**
+ * @brief color kernel 1
+ * @param row CSR pointer array
+ * @param col CSR column array
+ * @param node_value Vertex value array
+ * @param color_array Color value array
+ * @param stop Termination variable
+ * @param max_d Max array
+ * @param max_d Min array
+ * @param color Current color label
+ * @param num_nodes Number of vertices
+ * @param num_edges Number of edges
+ */
+__global__ void color1(int *row, int *col, int *node_value, int *color_array,
+ int *stop, int *max_d, int *min_d, const int color,
+ const int num_nodes, const int num_edges)
+{
+ // Get my workitem id
+ int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+ if (tid < num_nodes) {
+ // If the vertex is not colored
+ if (color_array[tid] == -1) {
+
+ // Get the start and end pointers for the neighbor list
+ int start = row[tid];
+ int end;
+ if (tid + 1 < num_nodes)
+ end = row[tid + 1];
+ else
+ end = num_edges;
+
+ int maximum = -1;
+ int minimum = BIG_NUM;
+ // Navigate the neighborlist
+ for (int edge = start; edge < end; edge++) {
+ if (color_array[col[edge]] == -1 && start != end - 1) {
+ *stop = 1;
+ // Determine if the vertex value is the maximum/minimum in the neighborhood
+ if (node_value[col[edge]] > maximum)
+ maximum = node_value[col[edge]];
+ if (node_value[col[edge]] < minimum)
+ minimum = node_value[col[edge]];
+ }
+ }
+ // Assign the maximum/miminum value to max/min array
+ max_d[tid] = maximum;
+ min_d[tid] = minimum;
+ }
+ }
+}
+
+/**
+ * @brief color kernel 2
+ * @param node_value Vertex value array
+ * @param color_array Color value array
+ * @param max_d Max array
+ * @param min_d Min array
+ * @param color Current color label
+ * @param num_nodes Number of vertices
+ * @param num_edges Number of edges
+ */
+__global__ void color2(int *node_value, int *color_array, int *max_d,
+ int *min_d, const int color, const int num_nodes,
+ const int num_edges)
+{
+ // Get my workitem id
+ int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+ if (tid < num_nodes) {
+ // If the vertex is still not colored
+ if (color_array[tid] == -1) {
+ // Assign a color
+ if (node_value[tid] >= max_d[tid])
+ color_array[tid] = color;
+ if (node_value[tid] <= min_d[tid])
+ color_array[tid] = color + 1;
+ }
+ }
+
+}
+
+/**
+ * @brief init kernel
+ * @param max_d Max array
+ * @param min_d Min array
+ * @param num_nodes Number of vertices
+ */
+__global__ void ini(int *max_d, int *min_d, const int num_nodes)
+{
+ // Get my workitem id
+ int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+ // Initialize max: -1 and min: Big_num
+ if (tid < num_nodes) {
+ max_d[tid] = -1;
+ min_d[tid] = BIG_NUM;
+ }
+
+}
+
+#endif
diff --git a/src/gpu/pannotia/fw/Floyd-Warshall.cpp b/src/gpu/pannotia/fw/Floyd-Warshall.cpp
new file mode 100644
index 0000000..75befaa
--- /dev/null
+++ b/src/gpu/pannotia/fw/Floyd-Warshall.cpp
@@ -0,0 +1,237 @@
+/************************************************************************************\
+ * *
+ * Copyright © 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+//#include <omp.h>
+#include "../graph_parser/util.h"
+#include "kernel.h"
+#include "parse.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#ifdef GEM5_FUSION
+#define MAX_ITERS 192
+#else
+#include <stdint.h>
+#define MAX_ITERS INT32_MAX
+#endif
+
+#define BIGNUM 999999
+#define TRUE 1
+#define FALSE 0
+
+int main(int argc, char **argv)
+{
+ char *tmpchar;
+ bool verify_results = false;
+
+ int num_nodes;
+ int num_edges;
+
+ hipError_t err = hipSuccess;
+
+ // Get program input
+ if (argc >= 2) {
+ tmpchar = argv[1]; // Graph input file
+ } else {
+ fprintf(stderr, "You did something wrong!\n");
+ exit(1);
+ }
+
+ if (argc >= 3) {
+ if (atoi(argv[2]) == 1) {
+ verify_results = true;
+ }
+ }
+
+ // Parse the adjacency matrix
+ int *adjmatrix = parse_graph_file(&num_nodes, &num_edges, tmpchar);
+ int dim = num_nodes;
+
+ // Initialize the distance matrix
+ int *distmatrix = (int *)malloc(dim * dim * sizeof(int));
+ if (!distmatrix) fprintf(stderr, "malloc failed - distmatrix\n");
+
+ // Initialize the result matrix
+ int *result = (int *)malloc(dim * dim * sizeof(int));
+ if (!result) fprintf(stderr, "malloc failed - result\n");
+
+ // TODO: Now only supports integer weights
+ // Setup the input matrix
+ for (int i = 0 ; i < dim; i++) {
+ for (int j = 0 ; j < dim; j++) {
+ if (i == j) {
+ // Diagonal
+ distmatrix[i * dim + j] = 0;
+ } else if (adjmatrix[i * dim + j] == -1) {
+ // Without edge
+ distmatrix[i * dim + j] = BIGNUM;
+ } else {
+ // With edge
+ distmatrix[i * dim + j] = adjmatrix[i * dim + j];
+ }
+ }
+ }
+
+ int *dist_d;
+ int *next_d;
+
+ // Create device-side FW buffers
+ err = hipMalloc(&dist_d, dim * dim * sizeof(int));
+ if (err != hipSuccess) {
+ printf("ERROR: hipMalloc dist_d (size:%d) => %d\n", dim * dim , err);
+ return -1;
+ }
+ err = hipMalloc(&next_d, dim * dim * sizeof(int));
+ if (err != hipSuccess) {
+ printf("ERROR: hipMalloc next_d (size:%d) => %d\n", dim * dim , err);
+ return -1;
+ }
+
+ //double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+ m5_work_begin(0, 0);
+#endif
+
+ // Copy the dist matrix to the device
+ err = hipMemcpy(dist_d, distmatrix, dim * dim * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy feature_d (size:%d) => %d\n", dim * dim, err);
+ return -1;
+ }
+
+ // Work dimension
+ dim3 threads(16, 16, 1);
+ dim3 grid(num_nodes / 16, num_nodes / 16, 1);
+
+ //double timer3 = gettime();
+ // Main computation loop
+ for (int k = 1; k < dim && k < MAX_ITERS; k++) {
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(floydwarshall), dim3(grid), dim3(threads), 0, 0, dist_d, next_d, dim, k);
+ }
+ hipDeviceSynchronize();
+
+ //double timer4 = gettime();
+ err = hipMemcpy(result, dist_d, dim * dim * sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: read back dist_d %d failed\n", err);
+ return -1;
+ }
+
+#ifdef GEM5_FUSION
+ m5_work_end(0, 0);
+#endif
+
+ //double timer2 = gettime();
+
+ //printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+ //printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+ if (verify_results) {
+ // Below is the verification part
+ // Calculate on the CPU
+ int *dist = distmatrix;
+ for (int k = 0; k < dim; k++) {
+ for (int i = 0; i < dim; i++) {
+ for (int j = 0; j < dim; j++) {
+ if (dist[i * dim + k] + dist[k * dim + j] < dist[i * dim + j]) {
+ dist[i * dim + j] = dist[i * dim + k] + dist[k * dim + j];
+ }
+ }
+ }
+ }
+
+ // Compare results
+ bool check_flag = 0;
+ for (int i = 0; i < dim; i++) {
+ for (int j = 0; j < dim; j++) {
+ if (dist[i * dim + j] != result[i * dim + j]) {
+ fprintf(stderr, "mismatch at (%d, %d)\n", i, j);
+ check_flag = 1;
+ }
+ }
+ }
+ // If there is mismatch, report
+ if (check_flag) {
+ fprintf(stdout, "WARNING: Produced incorrect results!\n");
+ } else {
+ printf("Results are correct!\n");
+ }
+ }
+
+ printf("Finishing Floyd-Warshall\n");
+
+ // Free host-side buffers
+ free(adjmatrix);
+ free(result);
+ free(distmatrix);
+
+ // Free CUDA buffers
+ hipFree(dist_d);
+ hipFree(next_d);
+
+ return 0;
+}
diff --git a/src/gpu/pannotia/fw/Makefile b/src/gpu/pannotia/fw/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/fw/Makefile
@@ -0,0 +1,11 @@
+default:
+ make -f Makefile.default
+
+clean:
+ make -f Makefile.default clean
+
+gem5-fusion:
+ make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+ make -f Makefile.gem5-fusion clean
diff --git a/src/gpu/pannotia/fw/Makefile.default b/src/gpu/pannotia/fw/Makefile.default
new file mode 100644
index 0000000..afdfcb5
--- /dev/null
+++ b/src/gpu/pannotia/fw/Makefile.default
@@ -0,0 +1,19 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+EXECUTABLE = fw_hip
+OPTS = -O3
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): Floyd-Warshall.cpp parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) parse.cpp ../graph_parser/util.cpp Floyd-Warshall.cpp -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/fw/Makefile.gem5-fusion b/src/gpu/pannotia/fw/Makefile.gem5-fusion
new file mode 100644
index 0000000..e9fbd79
--- /dev/null
+++ b/src/gpu/pannotia/fw/Makefile.gem5-fusion
@@ -0,0 +1,23 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+# TODO: Need some sort of explicit PATH? Read in?
+GEM5_PATH ?= /nobackup/sinclair/gem5
+CFLAGS += -I$(GEM5_PATH)/include
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/fw_hip.gem5
+
+$(BIN_DIR)/fw_hip.gem5: Floyd-Warshall.cpp parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) -O3 --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) parse.cpp ../graph_parser/util.cpp Floyd-Warshall.cpp -DGEM5_FUSION -o $(BIN_DIR)/fw_hip.gem5 $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/fw/README.md b/src/gpu/pannotia/fw/README.md
new file mode 100644
index 0000000..39424bb
--- /dev/null
+++ b/src/gpu/pannotia/fw/README.md
@@ -0,0 +1,49 @@
+---
+title: Pannotia FW Test
+tags:
+ - x86
+ - amdgpu
+layout: default
+permalink: resources/pannotia/fw
+shortdoc: >
+ Resources to build a disk image with the GCN3 Pannotia FW workload.
+---
+
+Floyd-Warshall (FW) is a graph analytics application that is part of the Pannotia benchmark suite. It is a classical dynamic-programming algorithm designed to solve the all-pairs shortest path (APSP) problem. The provided version is for use with the gpu-compute model of gem5. Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling FW, compiling the GCN3_X86/Vega_X86 versions of gem5, and running FW on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+To compile FW:
+
+```
+cd src/gpu/pannotia/fw
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead. Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/). By default, the Makefile builds for gfx801 and gfx803, and is placed in the src/gpu/pannotia/fw/bin folder.
+
+## Compiling GCN3_X86/gem5.opt
+
+FW is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture. The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running FW on GCN3_X86/gem5.opt
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/fw/bin -c fw_hip.gem5 --options="1k_128k.gr"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>. We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/fw/1k_128k.gr>), as this is the smallest input that can be run with FW. Note that 1k_128k is not designed for FW specifically though -- the above link has larger graphs designed to run with FW that you should consider using for larger experiments.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.
diff --git a/src/gpu/pannotia/fw/kernel.h b/src/gpu/pannotia/fw/kernel.h
new file mode 100644
index 0000000..3474a2f
--- /dev/null
+++ b/src/gpu/pannotia/fw/kernel.h
@@ -0,0 +1,88 @@
+/************************************************************************************\
+ * *
+ * Copyright © 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief naive floyd warshal kernel
+ * @param dist Distance array
+ * @param next Next array
+ * @param dim Dimension of the 2-D matrix
+ * @param k Current iteration number
+ */
+__global__ void
+floydwarshall(int *dist, int *next, int dim, int k)
+{
+ // Get my workitem id x_dim
+ int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+ // Get my workitem id y_dim
+ int j = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
+
+ if (i < dim && j < dim) {
+ // if (dist i -> k + k -> j) update the dist i-> j
+ if (dist[i * dim + k] + dist[k * dim + j] < dist[i * dim + j]) {
+ dist[i * dim + j] = dist[i * dim + k] + dist[k * dim + j];
+ next[i * dim + j] = k;
+ }
+ }
+}
+
+#endif // KERNEL_H
diff --git a/src/gpu/pannotia/fw/parse.cpp b/src/gpu/pannotia/fw/parse.cpp
new file mode 100644
index 0000000..eab11ea
--- /dev/null
+++ b/src/gpu/pannotia/fw/parse.cpp
@@ -0,0 +1,89 @@
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Test value
+bool test_value(int* array, int dim, int i, int j)
+{
+
+ // TODO: Current does not support multiple edges between two vertices
+ if (array[i * dim + j] != -1) {
+ // fprintf(stderr, "Possibly duplicate records at (%d, %d)\n", i, j);
+ return 0;
+ } else
+ return 1;
+}
+
+// Set value (i, j) = value
+void set_value(int* array, int dim, int i, int j, int value)
+{
+ array[i * dim + j] = value;
+}
+
+int* parse_graph_file(int *num_nodes, int *num_edges, char* tmpchar)
+{
+
+ int *adjmatrix;
+ int cnt = 0;
+ unsigned int lineno = 0;
+ char line[128], sp[2], a, p;
+
+ FILE *fptr;
+
+ fptr = fopen(tmpchar, "r");
+
+ if (!fptr) {
+ fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+ perror("fopen Error:");
+ exit(1);
+ }
+
+ printf("Opening file: %s\n", tmpchar);
+
+ while (fgets(line, 100, fptr)) {
+ int head, tail, weight;
+ long long unsigned size;
+ switch (line[0]) {
+ case 'c':
+ break;
+ case 'p':
+ sscanf(line, "%c %s %d %d", &p, sp, num_nodes, num_edges);
+ printf("Read from file: num_nodes = %d, num_edges = %d\n", *num_nodes, *num_edges);
+ size = (long long unsigned)(*num_nodes + 1) * (long long unsigned)(*num_nodes + 1);
+ if (size > UINT_MAX) {
+ fclose(fptr);
+ fprintf(stderr, "ERROR: Too many nodes, huge adjacency matrix\n");
+ exit(0);
+ }
+ adjmatrix = (int *)malloc(size * sizeof(int));
+ memset(adjmatrix, -1 , size * sizeof(int));
+ break;
+ case 'a':
+ sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+ if (tail == head) printf("reporting self loop\n");
+ if (test_value(adjmatrix, *num_nodes + 1, head, tail)) {
+ set_value(adjmatrix, *num_nodes + 1, head, tail, weight);
+ cnt++;
+ }
+
+#ifdef VERBOSE
+ printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+ break;
+ default:
+ fprintf(stderr, "exiting loop\n");
+ break;
+ }
+ lineno++;
+ }
+
+ *num_edges = cnt;
+ printf("Actual added edges: %d\n", cnt);
+
+ fclose(fptr);
+
+ return adjmatrix;
+
+}
diff --git a/src/gpu/pannotia/fw/parse.h b/src/gpu/pannotia/fw/parse.h
new file mode 100644
index 0000000..01001e2
--- /dev/null
+++ b/src/gpu/pannotia/fw/parse.h
@@ -0,0 +1,6 @@
+#ifndef __FW_PARSE_H__
+#define __FW_PARSE_H__
+
+int* parse_graph_file(int *num_nodes, int *num_edges, char* tmpchar);
+
+#endif
diff --git a/src/gpu/pannotia/graph_parser/parse.cpp b/src/gpu/pannotia/graph_parser/parse.cpp
new file mode 100644
index 0000000..80fb6f4
--- /dev/null
+++ b/src/gpu/pannotia/graph_parser/parse.cpp
@@ -0,0 +1,882 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "parse.h"
+#include "stdlib.h"
+#include "stdio.h"
+#include <string.h>
+#include <algorithm>
+#include <sys/time.h>
+#include "util.h"
+
+bool doCompare(CooTuple elem1, CooTuple elem2)
+{
+ if (elem1.row < elem2.row) {
+ return true;
+ }
+ return false;
+}
+
+ell_array *csr2ell(csr_array *csr, int num_nodes, int num_edges, int fill)
+{
+ int size, maxheight = 0;
+ for (int i = 0; i < num_nodes; i++) {
+ size = csr->row_array[i + 1] - csr->row_array[i];
+ if (size > maxheight)
+ maxheight = size;
+ }
+
+ ell_array *ell = (ell_array *)malloc(sizeof(ell_array));
+ if (!ell) printf("malloc failed");
+
+ ell->max_height = maxheight;
+ ell->num_nodes = num_nodes;
+
+ ell->col_array = (int*)malloc(sizeof(int) * maxheight * num_nodes);
+ ell->data_array = (int*)malloc(sizeof(int) * maxheight * num_nodes);
+
+
+ for (int i = 0; i < maxheight * num_nodes; i++) {
+ ell->col_array[i] = 0;
+ ell->data_array[i] = fill;
+ }
+
+ for (int i = 0; i < num_nodes; i++) {
+ int start = csr->row_array[i];
+ int end = csr->row_array[i + 1];
+ int lastcolid = 0;
+ for (int j = start; j < end; j++) {
+ int colid = csr->col_array[j];
+ int data = csr->data_array[j];
+ ell->col_array[i + (j - start) * num_nodes] = colid;
+ ell->data_array[i + (j - start) * num_nodes] = data;
+ lastcolid = colid;
+ }
+ for (int j = end; j < start + maxheight; j++) {
+ ell->col_array[i + (j - start) * num_nodes] = lastcolid;
+ ell->data_array[i + (j - start) * num_nodes] = fill;
+ }
+ }
+
+ return ell;
+
+}
+
+csr_array *parseMetis(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+
+ int cnt = 0;
+ unsigned int lineno = 0;
+ char *line = (char *)malloc(8192);
+ int num_edges = 0, num_nodes = 0;
+
+ FILE *fptr;
+ CooTuple *tuple_array = NULL;
+
+ fptr = fopen(tmpchar, "r");
+ if (!fptr) {
+ fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+ exit(1);
+ }
+
+ printf("Opening file: %s\n", tmpchar);
+
+ while (fgets(line, 8192, fptr)) {
+ int head, tail, weight = 0;
+ CooTuple temp;
+
+ if (line[0] == '%') continue; // skip comment lines
+
+ if (lineno == 0) { //the first line
+
+ sscanf(line, "%d %d", p_num_nodes, p_num_edges);
+ if (!directed) {
+ *p_num_edges = *p_num_edges * 2;
+ printf("This is an undirected graph\n");
+ } else {
+ printf("This is a directed graph\n");
+ }
+ num_nodes = *p_num_nodes;
+ num_edges = *p_num_edges;
+
+
+ printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+ tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+ } else if (lineno > 0) { //from the second line
+
+ char *pch;
+ pch = strtok(line , " ,.-");
+ while (pch != NULL) {
+ head = lineno;
+ tail = atoi(pch);
+ if (tail <= 0) break;
+
+ if (tail == head) printf("reporting self loop: %d, %d\n", lineno + 1, lineno);
+
+ temp.row = head - 1;
+ temp.col = tail - 1;
+ temp.val = weight;
+
+ tuple_array[cnt++] = temp;
+
+ pch = strtok(NULL, " ,.-");
+
+ }
+ }
+
+#ifdef VERBOSE
+ printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+
+ lineno++;
+
+ }
+
+ // Metis files are stored in row-order, so sorting is unnecessary
+ // std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+ for (int i = 0 ; i < num_edges; i++) {
+ printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+ }
+#endif
+
+ int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+ int *col_array = (int *)malloc(num_edges * sizeof(int));
+ int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+ int row_cnt = 0;
+ int prev = -1;
+ int idx;
+ for (idx = 0; idx < num_edges; idx++) {
+ int curr = tuple_array[idx].row;
+ if (curr != prev) {
+ row_array[row_cnt++] = idx;
+ prev = curr;
+ }
+ col_array[idx] = tuple_array[idx].col;
+ data_array[idx] = tuple_array[idx].val;
+
+ }
+ row_array[row_cnt] = idx;
+
+ csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+ memset(csr, 0, sizeof(csr_array));
+ csr->row_array = row_array;
+ csr->col_array = col_array;
+ csr->data_array = data_array;
+
+ fclose(fptr);
+ free(tuple_array);
+ free(line);
+
+ return csr;
+
+}
+
+
+csr_array *parseCOO(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+ int cnt = 0;
+ unsigned int lineno = 0;
+ char line[128], sp[2], a, p;
+ int num_nodes = 0, num_edges = 0;
+
+ FILE *fptr;
+ CooTuple *tuple_array = NULL;
+
+ fptr = fopen(tmpchar, "r");
+ if (!fptr) {
+ fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+ exit(1);
+ }
+
+ printf("Opening file: %s\n", tmpchar);
+
+ while (fgets(line, 100, fptr)) {
+ int head, tail, weight;
+ switch (line[0]) {
+ case 'c':
+ break;
+ case 'p':
+ sscanf(line, "%c %s %d %d", &p, sp, p_num_nodes, p_num_edges);
+
+ if (!directed) {
+ *p_num_edges = *p_num_edges * 2;
+ printf("This is an undirected graph\n");
+ } else {
+ printf("This is a directed graph\n");
+ }
+
+ num_nodes = *p_num_nodes;
+ num_edges = *p_num_edges;
+
+ printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+ tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+ break;
+
+ case 'a':
+ sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+ if (tail == head) printf("reporting self loop\n");
+ CooTuple temp;
+ temp.row = head - 1;
+ temp.col = tail - 1;
+ temp.val = weight;
+ tuple_array[cnt++] = temp;
+ if (!directed) {
+ temp.row = tail - 1;
+ temp.col = head - 1;
+ temp.val = weight;
+ tuple_array[cnt++] = temp;
+ }
+
+#ifdef VERBOSE
+ printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+ break;
+ default:
+ fprintf(stderr, "exiting loop\n");
+ break;
+ }
+ lineno++;
+ }
+
+ std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+ for (int i = 0 ; i < num_edges; i++) {
+ printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+ }
+#endif
+
+ int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+ int *col_array = (int *)malloc(num_edges * sizeof(int));
+ int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+ int row_cnt = 0;
+ int prev = -1;
+ int idx;
+ for (idx = 0; idx < num_edges; idx++) {
+ int curr = tuple_array[idx].row;
+ if (curr != prev) {
+ row_array[row_cnt++] = idx;
+ prev = curr;
+ }
+
+ col_array[idx] = tuple_array[idx].col;
+ data_array[idx] = tuple_array[idx].val;
+ }
+
+ row_array[row_cnt] = idx;
+
+ fclose(fptr);
+ free(tuple_array);
+
+ csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+ memset(csr, 0, sizeof(csr_array));
+ csr->row_array = row_array;
+ csr->col_array = col_array;
+ csr->data_array = data_array;
+
+ return csr;
+
+}
+
+// Parse Metis file with double edges
+double_edges *parseMetis_doubleEdge(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+ int cnt = 0;
+ unsigned int lineno = 0;
+ char line[4096];
+ int num_edges = 0, num_nodes = 0;
+ FILE *fptr;
+ CooTuple *tuple_array = NULL;
+
+ fptr = fopen(tmpchar, "r");
+ if (!fptr) {
+ fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+ exit(1);
+ }
+
+ printf("Opening file: %s\n", tmpchar);
+
+ while (fgets(line, 4096, fptr)) {
+ int head, tail, weight = 0;
+ CooTuple temp;
+
+ if (line[0] == '%') continue; // skip comment lines
+
+ if (lineno == 0) { //the first line
+
+ sscanf(line, "%d %d", p_num_nodes, p_num_edges);
+ if (!directed) {
+ *p_num_edges = *p_num_edges * 2;
+ printf("This is an undirected graph\n");
+ } else {
+ printf("This is a directed graph\n");
+ }
+
+ num_nodes = *p_num_nodes;
+ num_edges = *p_num_edges;
+
+ printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+ tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+ if (!tuple_array) printf("xxxxxxxx\n");
+
+ } else if (lineno > 0) { //from the second line
+ char *pch;
+ pch = strtok(line , " ,.-");
+ while (pch != NULL) {
+ head = lineno;
+ tail = atoi(pch);
+ if (tail <= 0) break;
+
+ if (tail == head) printf("reporting self loop: %d, %d\n", lineno + 1, lineno);
+
+ temp.row = head - 1;
+ temp.col = tail - 1;
+ temp.val = weight;
+
+ tuple_array[cnt++] = temp;
+
+ pch = strtok(NULL, " ,.-");
+ }
+ }
+
+#ifdef VERBOSE
+ printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+
+ lineno++;
+ }
+
+ // Metis files are stored in row-order, so sorting is unnecessary
+ // std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+ for (int i = 0 ; i < num_edges; i++) {
+ printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+ }
+#endif
+
+ int *edge_array1 = (int *)malloc(num_edges * sizeof(int));
+ int *edge_array2 = (int *)malloc(num_edges * sizeof(int));
+
+ for (int i = 0; i < num_edges; i++) {
+ edge_array1[i] = tuple_array[i].row;
+ edge_array2[i] = tuple_array[i].col;
+ }
+
+ fclose(fptr);
+ free(tuple_array);
+
+ double_edges *de = (double_edges *)malloc(sizeof(double_edges));
+ de->edge_array1 = edge_array1;
+ de->edge_array2 = edge_array2;
+
+ return de;
+
+}
+
+// Parse COO file with double edges
+double_edges *parseCOO_doubleEdge(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+ int cnt = 0;
+ unsigned int lineno = 0;
+ char line[128], sp[2], a, p;
+ int num_nodes = 0, num_edges = 0;
+
+ FILE *fptr;
+ CooTuple *tuple_array = NULL;
+
+ fptr = fopen(tmpchar, "r");
+ if (!fptr) {
+ fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+ exit(1);
+ }
+
+ printf("Opening file: %s\n", tmpchar);
+
+ while (fgets(line, 100, fptr)) {
+ int head, tail, weight;
+ switch (line[0]) {
+ case 'c':
+ break;
+ case 'p':
+ sscanf(line, "%c %s %d %d", &p, sp, p_num_nodes, p_num_edges);
+
+ if (!directed) {
+ *p_num_edges = *p_num_edges * 2;
+ printf("This is an undirected graph\n");
+ } else {
+ printf("This is a directed graph\n");
+ }
+
+ num_nodes = *p_num_nodes;
+ num_edges = *p_num_edges;
+
+ printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+ tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+ break;
+ case 'a':
+ sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+ if (tail == head) printf("reporting self loop\n");
+ CooTuple temp;
+ temp.row = head - 1;
+ temp.col = tail - 1;
+ temp.val = weight;
+ tuple_array[cnt++] = temp;
+ if (!directed) {
+ temp.row = tail - 1;
+ temp.col = head - 1;
+ temp.val = weight;
+ tuple_array[cnt++] = temp;
+ }
+
+#ifdef VERBOSE
+ printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+ break;
+ default:
+ fprintf(stderr, "exiting loop\n");
+ break;
+
+ }
+ lineno++;
+ }
+
+ std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+ for (int i = 0 ; i < num_edges; i++) {
+ printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+ }
+#endif
+
+ int *edge_array1 = (int *)malloc(num_edges * sizeof(int));
+ int *edge_array2 = (int *)malloc(num_edges * sizeof(int));
+
+ for (int i = 0; i < num_edges; i++) {
+ edge_array1[i] = tuple_array[i].row;
+ edge_array2[i] = tuple_array[i].col;
+ }
+
+ fclose(fptr);
+ free(tuple_array);
+
+ double_edges *de = (double_edges *)malloc(sizeof(double_edges));
+ de->edge_array1 = edge_array1;
+ de->edge_array2 = edge_array2;
+
+ return de;
+}
+
+// Parse matrix market file
+csr_array *parseMM(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed, bool weight_flag)
+{
+ int cnt = 0;
+ unsigned int lineno = 0;
+ char line[128];
+ int num_nodes = 0, num_edges = 0, num_nodes2 = 0;
+
+ FILE *fptr;
+ CooTuple *tuple_array = NULL;
+
+ fptr = fopen(tmpchar, "r");
+ if (!fptr) {
+ fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+ exit(1);
+ }
+
+ printf("Opening file: %s\n", tmpchar);
+
+ while (fgets(line, 100, fptr)) {
+ int head, tail, weight;
+ if (line[0] == '%') continue;
+ if (lineno == 0) {
+ sscanf(line, "%d %d %d", p_num_nodes, &num_nodes2, p_num_edges);
+ if (!directed) {
+ *p_num_edges = *p_num_edges * 2;
+ printf("This is an undirected graph\n");
+ } else {
+ printf("This is a directed graph\n");
+ }
+
+ num_nodes = *p_num_nodes;
+ num_edges = *p_num_edges;
+
+ printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+ tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+ if (!tuple_array) {
+ printf("tuple array not allocated succesfully\n");
+ exit(1);
+ }
+
+ }
+ if (lineno > 0) {
+
+ if (weight_flag) {
+ sscanf(line, "%d %d %d", &head, &tail, &weight);
+ } else {
+ sscanf(line, "%d %d", &head, &tail);
+ printf("(%d, %d)\n", head, tail);
+ weight = 0;
+ }
+
+ if (tail == head) {
+ printf("reporting self loop\n");
+ continue;
+ };
+
+ CooTuple temp;
+ temp.row = head - 1;
+ temp.col = tail - 1;
+ temp.val = weight;
+ tuple_array[cnt++] = temp;
+
+ if (!directed) {
+ temp.row = tail - 1;
+ temp.col = head - 1;
+ temp.val = weight;
+ tuple_array[cnt++] = temp;
+ }
+
+#ifdef VERBOSE
+ printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+ }
+ lineno++;
+ }
+
+ std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+ for (int i = 0 ; i < num_edges; i++) {
+ printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+ }
+#endif
+
+ int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+ int *col_array = (int *)malloc(num_edges * sizeof(int));
+ int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+ int row_cnt = 0;
+ int prev = -1;
+ int idx;
+ for (idx = 0; idx < num_edges; idx++) {
+ int curr = tuple_array[idx].row;
+ if (curr != prev) {
+ row_array[row_cnt++] = idx;
+ prev = curr;
+ }
+
+ col_array[idx] = tuple_array[idx].col;
+ data_array[idx] = tuple_array[idx].val;
+ }
+ row_array[row_cnt] = idx;
+
+ fclose(fptr);
+ free(tuple_array);
+
+ csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+ memset(csr, 0, sizeof(csr_array));
+ csr->row_array = row_array;
+ csr->col_array = col_array;
+ csr->data_array = data_array;
+
+ return csr;
+}
+
+// Parse Metis file with transpose
+csr_array *parseMetis_transpose(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+ int cnt = 0;
+ unsigned int lineno = 0;
+ char *line = (char *)malloc(8192);
+ int num_edges = 0, num_nodes = 0;
+ int *col_cnt = NULL;
+
+ FILE *fptr;
+ CooTuple *tuple_array = NULL;
+
+ fptr = fopen(tmpchar, "r");
+ if (!fptr) {
+ fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+ exit(1);
+ }
+
+ printf("Opening file: %s\n", tmpchar);
+ while (fgets(line, 8192, fptr)) {
+ int head, tail, weight = 0;
+ CooTuple temp;
+
+ if (line[0] == '%') continue; // skip comment lines
+
+ if (lineno == 0) { //the first line
+
+ sscanf(line, "%d %d", p_num_nodes, p_num_edges);
+
+ col_cnt = (int *)malloc(*p_num_nodes * sizeof(int));
+ if (!col_cnt) {
+ printf("memory allocation failed for col_cnt\n");
+ exit(1);
+ }
+ memset(col_cnt, 0, *p_num_nodes * sizeof(int));
+
+ if (!directed) {
+ *p_num_edges = *p_num_edges * 2;
+ printf("This is an undirected graph\n");
+ } else {
+ printf("This is a directed graph\n");
+ }
+ num_nodes = *p_num_nodes;
+ num_edges = *p_num_edges;
+
+ printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+ tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+ } else if (lineno > 0) { //from the second line
+ char *pch;
+ pch = strtok(line , " ,.-");
+ while (pch != NULL) {
+ head = lineno;
+ tail = atoi(pch);
+ if (tail <= 0) {
+ break;
+ }
+
+ if (tail == head) printf("reporting self loop: %d, %d\n", lineno + 1, lineno);
+
+ if (directed) {
+ temp.row = tail - 1;
+ temp.col = head - 1;
+ } else {
+ // Undirected matrices are symmetric, so there is no need
+ // to transpose and then re-sort the edges
+ temp.row = head - 1;
+ temp.col = tail - 1;
+ }
+ temp.val = weight;
+
+ col_cnt[head - 1]++;
+ if (cnt >= num_edges) {
+ fprintf(stderr, "Error when opening file: %s.\n" \
+ " Check if graph is undirected Metis format\n", tmpchar);
+ exit(1);
+ }
+ tuple_array[cnt++] = temp;
+
+ pch = strtok(NULL, " ,.-");
+ }
+ }
+#ifdef VERBOSE
+ printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+ lineno++;
+ }
+
+ if (directed) {
+ // Metis files are stored in row-order, so transposed, directed
+ // matrices must be re-sorted!
+ std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+ }
+
+#ifdef VERBOSE
+ for (int i = 0 ; i < num_edges; i++) {
+ printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+ }
+#endif
+
+ int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+ int *col_array = (int *)malloc(num_edges * sizeof(int));
+ int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+ int row_cnt = 0;
+ int prev = -1;
+ int idx;
+ for (idx = 0; idx < num_edges; idx++) {
+ int curr = tuple_array[idx].row;
+ if (curr != prev) {
+ row_array[row_cnt++] = idx;
+ prev = curr;
+ }
+ col_array[idx] = tuple_array[idx].col;
+ data_array[idx] = tuple_array[idx].val;
+ }
+ row_array[row_cnt] = idx;
+
+ csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+ memset(csr, 0, sizeof(csr_array));
+ csr->row_array = row_array;
+ csr->col_array = col_array;
+ csr->data_array = data_array;
+ csr->col_cnt = col_cnt;
+
+ fclose(fptr);
+ free(tuple_array);
+
+ return csr;
+}
+
+// Parse COO file with transpose
+csr_array *parseCOO_transpose(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+ int cnt = 0;
+ unsigned int lineno = 0;
+ char line[128], sp[2], a, p;
+ int num_nodes = 0, num_edges = 0;
+
+ FILE *fptr;
+ CooTuple *tuple_array = NULL;
+
+ fptr = fopen(tmpchar, "r");
+ if (!fptr) {
+ fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+ exit(1);
+ }
+
+ printf("Opening file: %s\n", tmpchar);
+
+ while (fgets(line, 100, fptr)) {
+ int head, tail, weight;
+ switch (line[0]) {
+ case 'c':
+ break;
+ case 'p':
+ fflush(stdout);
+
+ sscanf(line, "%c %s %d %d", &p, sp, p_num_nodes, p_num_edges);
+
+ if (!directed) {
+ *p_num_edges = *p_num_edges * 2;
+ printf("This is an undirected graph\n");
+ } else {
+ printf("This is a directed graph\n");
+ }
+
+ num_nodes = *p_num_nodes;
+ num_edges = *p_num_edges;
+
+ printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+ tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+ break;
+
+ case 'a':
+ sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+ if (tail == head) printf("reporting self loop\n");
+ CooTuple temp;
+ temp.val = weight;
+ temp.row = tail - 1;
+ temp.col = head - 1;
+ tuple_array[cnt++] = temp;
+ if (!directed) {
+ temp.val = weight;
+ temp.row = tail - 1;
+ temp.col = head - 1;
+ tuple_array[cnt++] = temp;
+ }
+
+#ifdef VERBOSE
+ printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+ break;
+ default:
+ fprintf(stderr, "exiting loop\n");
+ break;
+ }
+ lineno++;
+ }
+
+ std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+ for (int i = 0 ; i < num_edges; i++) {
+ printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+ }
+#endif
+
+ int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+ int *col_array = (int *)malloc(num_edges * sizeof(int));
+ int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+ int row_cnt = 0;
+ int prev = -1;
+ int idx;
+ for (idx = 0; idx < num_edges; idx++) {
+ int curr = tuple_array[idx].row;
+ if (curr != prev) {
+ row_array[row_cnt++] = idx;
+ prev = curr;
+ }
+ col_array[idx] = tuple_array[idx].col;
+ data_array[idx] = tuple_array[idx].val;
+ }
+ while (row_cnt <= num_nodes) {
+ row_array[row_cnt++] = idx;
+ }
+
+ csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+ memset(csr, 0, sizeof(csr_array));
+ csr->row_array = row_array;
+ csr->col_array = col_array;
+ csr->data_array = data_array;
+
+ fclose(fptr);
+ free(tuple_array);
+
+ return csr;
+}
+
diff --git a/src/gpu/pannotia/graph_parser/parse.h b/src/gpu/pannotia/graph_parser/parse.h
new file mode 100644
index 0000000..e6273ac
--- /dev/null
+++ b/src/gpu/pannotia/graph_parser/parse.h
@@ -0,0 +1,114 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include <stdlib.h>
+
+typedef struct csr_arrays_t {
+ int *row_array;
+ int *col_array;
+ int *data_array;
+ int *col_cnt;
+
+ void freeArrays() {
+ if (row_array) {
+ free(row_array);
+ row_array = NULL;
+ }
+ if (col_array) {
+ free(col_array);
+ col_array = NULL;
+ }
+ if (data_array) {
+ free(data_array);
+ data_array = NULL;
+ }
+ if (col_cnt) {
+ free(col_cnt);
+ col_cnt = NULL;
+ }
+ }
+} csr_array;
+
+typedef struct ell_arrays_t {
+ int max_height;
+ int num_nodes;
+ int *col_array;
+ int *data_array;
+ int *col_cnt;
+} ell_array;
+
+typedef struct double_edges_t {
+ int *edge_array1;
+ int *edge_array2;
+} double_edges;
+
+typedef struct cooedgetuple {
+ int row;
+ int col;
+ int val;
+} CooTuple;
+
+csr_array *parseCOO(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+csr_array *parseMetis(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+csr_array *parseMM(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed, bool weight_flag);
+ell_array *csr2ell(csr_array *csr, int num_nodes, int num_edges, int fill);
+
+double_edges *parseCOO_doubleEdge(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+double_edges *parseMetis_doubleEdge(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+
+csr_array *parseCOO_transpose(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+csr_array *parseMetis_transpose(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
diff --git a/src/gpu/pannotia/graph_parser/util.cpp b/src/gpu/pannotia/graph_parser/util.cpp
new file mode 100644
index 0000000..93105ad
--- /dev/null
+++ b/src/gpu/pannotia/graph_parser/util.cpp
@@ -0,0 +1,67 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+double gettime() {
+ struct timeval t;
+ gettimeofday(&t,NULL);
+ return t.tv_sec+t.tv_usec*1e-6;
+}
diff --git a/src/gpu/pannotia/graph_parser/util.h b/src/gpu/pannotia/graph_parser/util.h
new file mode 100644
index 0000000..b05ae2f
--- /dev/null
+++ b/src/gpu/pannotia/graph_parser/util.h
@@ -0,0 +1,58 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+double gettime();
diff --git a/src/gpu/pannotia/mis/Makefile b/src/gpu/pannotia/mis/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/mis/Makefile
@@ -0,0 +1,11 @@
+default:
+ make -f Makefile.default
+
+clean:
+ make -f Makefile.default clean
+
+gem5-fusion:
+ make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+ make -f Makefile.gem5-fusion clean
diff --git a/src/gpu/pannotia/mis/Makefile.default b/src/gpu/pannotia/mis/Makefile.default
new file mode 100644
index 0000000..9211ba6
--- /dev/null
+++ b/src/gpu/pannotia/mis/Makefile.default
@@ -0,0 +1,20 @@
+EXECUTABLE = mis_hip
+OPTS = -O3
+
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): mis.cpp ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) mis.cpp ../graph_parser/parse.cpp ../graph_parser/util.cpp -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/mis/Makefile.gem5-fusion b/src/gpu/pannotia/mis/Makefile.gem5-fusion
new file mode 100644
index 0000000..b5678a5
--- /dev/null
+++ b/src/gpu/pannotia/mis/Makefile.gem5-fusion
@@ -0,0 +1,26 @@
+EXECUTABLE = mis_hip.gem5
+OPTS = -O3
+
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+# TODO: Need some sort of explicit PATH? Read in?
+GEM5_PATH ?= /nobackup/sinclair/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): mis.cpp ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) mis.cpp ../graph_parser/parse.cpp ../graph_parser/util.cpp -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/mis/README.md b/src/gpu/pannotia/mis/README.md
new file mode 100644
index 0000000..e587055
--- /dev/null
+++ b/src/gpu/pannotia/mis/README.md
@@ -0,0 +1,49 @@
+---
+title: Pannotia MIS Test
+tags:
+ - x86
+ - amdgpu
+layout: default
+permalink: resources/pannotia/mis
+shortdoc: >
+ Resources to build a disk image with the GCN3 Pannotia MIS workload.
+---
+
+Maximal Independent Set (mis) is a graph analytics application that is part of the Pannotia benchmark suite. It is designed to find a maximal subset of vertices in a graph such that no two are adjacent. The provided version is for use with the gpu-compute model of gem5. Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling MIS, compiling the GCN3_X86/Vega_X86 versions of gem5, and running MIS on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+To compile MIS:
+
+```
+cd src/gpu/pannotia/mis
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead. Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/). By default, the Makefile builds for gfx801 and gfx803, and is placed in the src/gpu/pannotia/mis/bin folder.
+
+## Compiling GCN3_X86/gem5.opt
+
+MIS is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture. The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running MIS on GCN3_X86/gem5.opt
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/mis/bin -c mis.gem5 --options="1k_128k.gr 0"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>. We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/mis/1k_128k.gr>), as this is the smallest input that can be run with MIS. Note that 1k_128k is not designed for MIS specifically though -- the above link has larger graphs designed to run with MIS that you should consider using for larger experiments.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.
diff --git a/src/gpu/pannotia/mis/kernel.h b/src/gpu/pannotia/mis/kernel.h
new file mode 100644
index 0000000..19d0eae
--- /dev/null
+++ b/src/gpu/pannotia/mis/kernel.h
@@ -0,0 +1,199 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include "hip/hip_runtime.h"
+
+#define BIGNUM 99999999
+
+/**
+* init kernel
+* @param s_array set array
+* @param c_array status array
+* @param cu_array status update array
+* @param num_nodes number of vertices
+* @param num_edges number of edges
+*/
+__global__ void
+init(int *s_array, int *c_array, int *cu_array, int num_nodes, int num_edges)
+{
+ // Get my workitem id
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+ if (tid < num_nodes) {
+ // Set the status array: not processed
+ c_array[tid] = -1;
+ cu_array[tid] = -1;
+ s_array[tid] = 0;
+ }
+}
+
+/**
+* mis1 kernel
+* @param row csr pointer array
+* @param col csr column index array
+* @param node_value node value array
+* @param s_array set array
+* @param c_array node status array
+* @param min_array node value array
+* @param stop node value array
+* @param num_nodes number of vertices
+* @param num_edges number of edges
+*/
+__global__ void
+mis1(int *row, int *col, int *node_value, int *s_array, int *c_array,
+ int *min_array, int *stop, int num_nodes, int num_edges)
+{
+ // Get workitem id
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+ if (tid < num_nodes) {
+ // If the vertex is not processed
+ if (c_array[tid] == -1) {
+ *stop = 1;
+ // Get the start and end pointers
+ int start = row[tid];
+ int end;
+ if (tid + 1 < num_nodes) {
+ end = row[tid + 1];
+ } else {
+ end = num_edges;
+ }
+
+ // Navigate the neighbor list and find the min
+ int min = BIGNUM;
+ for (int edge = start; edge < end; edge++) {
+ if (c_array[col[edge]] == -1) {
+ if (node_value[col[edge]] < min) {
+ min = node_value[col[edge]];
+ }
+ }
+ }
+ min_array[tid] = min;
+ }
+ }
+}
+
+/**
+* mis2 kernel
+* @param row csr pointer array
+* @param col csr column index array
+* @param node_value node value array
+* @param s_array set array
+* @param c_array status array
+* @param cu_array status update array
+* @param min_array node value array
+* @param num_nodes number of vertices
+* @param num_edges number of edges
+*/
+__global__ void
+mis2(int *row, int *col, int *node_value, int *s_array, int *c_array,
+ int *cu_array, int *min_array, int num_nodes, int num_edges)
+{
+ // Get my workitem id
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ if (node_value[tid] <= min_array[tid] && c_array[tid] == -1) {
+ // -1: Not processed -2: Inactive 2: Independent set
+ // Put the item into the independent set
+ s_array[tid] = 2;
+
+ // Get the start and end pointers
+ int start = row[tid];
+ int end;
+
+ if (tid + 1 < num_nodes) {
+ end = row[tid + 1];
+ } else {
+ end = num_edges;
+ }
+
+ // Set the status to inactive
+ c_array[tid] = -2;
+
+ // Mark all the neighbors inactive
+ for (int edge = start; edge < end; edge++) {
+ if (c_array[col[edge]] == -1) {
+ //use status update array to avoid race
+ cu_array[col[edge]] = -2;
+ }
+ }
+ }
+ }
+}
+
+/**
+* mis3 kernel
+* @param cu_array status update array
+* @param c_array status array
+* @param num_nodes number of vertices
+*/
+__global__ void
+mis3(int *cu_array, int *c_array, int num_nodes)
+{
+ //get my workitem id
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ //set the status array
+ if (tid < num_nodes && cu_array[tid] == -2) {
+ c_array[tid] = cu_array[tid];
+ }
+}
+
+#endif // KERNEL_H
diff --git a/src/gpu/pannotia/mis/mis.cpp b/src/gpu/pannotia/mis/mis.cpp
new file mode 100644
index 0000000..a6a04fb
--- /dev/null
+++ b/src/gpu/pannotia/mis/mis.cpp
@@ -0,0 +1,334 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include <algorithm>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#define RANGE 2048
+
+void dump2file(int *adjmatrix, int num_nodes);
+void print_vector(int *vector, int num);
+void print_vectorf(float *vector, int num);
+
+int main(int argc, char **argv)
+{
+ char *tmpchar;
+
+ int num_nodes;
+ int num_edges;
+ int file_format = 1;
+ bool directed = 0;
+
+ hipError_t err = hipSuccess;
+
+ // Input arguments
+ if (argc == 3) {
+ tmpchar = argv[1]; // Graph inputfile
+ file_format = atoi(argv[2]); // Choose file format
+ } else {
+ fprintf(stderr, "You did something wrong!\n");
+ exit(1);
+ }
+
+ srand(7);
+
+ // Allocate the csr array
+ csr_array *csr;
+
+ // Parse the graph into the csr structure
+ if (file_format == 1) {
+ csr = parseMetis(tmpchar, &num_nodes, &num_edges, directed);
+ } else if (file_format == 0) {
+ csr = parseCOO(tmpchar, &num_nodes, &num_edges, directed);
+ } else {
+ fprintf(stderr, "reserve for future");
+ exit(1);
+ }
+
+ // Allocate the node value array
+ int *node_value = (int *)malloc(num_nodes * sizeof(int));
+ if (!node_value) fprintf(stderr, "malloc failed node_value\n");
+
+ // Allocate the set array
+ int *s_array = (int *)malloc(num_nodes * sizeof(int));
+ if (!s_array) fprintf(stderr, "malloc failed node_value\n");
+
+ // Randomize the node values
+ for (int i = 0; i < num_nodes; i++) {
+ node_value[i] = rand() % RANGE;
+ }
+
+ // Create device side buffers
+ int *row_d;
+ int *col_d;
+
+ int *c_array_d;
+ int *c_array_u_d;
+ int *s_array_d;
+ int *node_value_d;
+ int *min_array_d;
+ int *stop_d;
+
+ // Allocate the device-side buffers for the graph
+ err = hipMalloc(&row_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&col_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc col_d (size:%d) => %s\n", num_edges , hipGetErrorString(err));
+ return -1;
+ }
+
+ // Termination variable
+ err = hipMalloc(&stop_d, sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n", 1, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Allocate the device-side buffers for mis
+ err = hipMalloc(&min_array_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc min_array_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&c_array_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc c_array_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&c_array_u_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc c_array_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&s_array_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc s_array_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&node_value_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc node_value_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+ return -1;
+ }
+
+// double time1 = gettime();
+
+#ifdef GEM5_FUSION
+ m5_work_begin(0, 0);
+#endif
+
+ // Copy data to device-side buffers
+ err = hipMemcpy(row_d, csr->row_array, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(node_value_d, node_value, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy feature_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Work dimensions
+ int block_size = 128;
+ int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+ dim3 threads(block_size, 1, 1);
+ dim3 grid(num_blocks, 1, 1);
+
+ // Launch the initialization kernel
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(init), dim3(grid), dim3(threads), 0, 0, s_array_d, c_array_d, c_array_u_d,
+ num_nodes, num_edges);
+ hipDeviceSynchronize();
+ err = hipGetLastError();
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: init kernel (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Termination variable
+ int stop = 1;
+ int iterations = 0;
+ while (stop) {
+ stop = 0;
+
+ // Copy the termination variable to the device
+ err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: write stop_d variable (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Launch mis1
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(mis1), dim3(grid), dim3(threads), 0, 0, row_d, col_d, node_value_d, s_array_d,
+ c_array_d, min_array_d, stop_d, num_nodes,
+ num_edges);
+
+ // Launch mis2
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(mis2), dim3(grid), dim3(threads), 0, 0, row_d, col_d, node_value_d, s_array_d,
+ c_array_d, c_array_u_d, min_array_d, num_nodes,
+ num_edges);
+
+ // Launch mis3
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(mis3), dim3(grid), dim3(threads), 0, 0, c_array_u_d, c_array_d, num_nodes);
+
+ // Copy the termination variable back
+ err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: read stop_d variable (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ iterations++;
+ }
+
+ hipDeviceSynchronize();
+
+ err = hipMemcpy(s_array, s_array_d, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy s_array_d failed (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+#ifdef GEM5_FUSION
+ m5_work_end(0, 0);
+#endif
+
+// double time2 = gettime();
+
+ // Print out the timing characterisitics
+ printf("number of iterations: %d\n", iterations);
+// printf("kernel + memcpy time %f ms\n", (time2 - time1) * 1000);
+
+#if 1
+ // Print the set array
+ print_vector(s_array, num_nodes);
+#endif
+
+ // Clean up the host-side arrays
+ free(node_value);
+ free(s_array);
+ csr->freeArrays();
+ free(csr);
+
+ // Clean up the device-side arrays
+ hipFree(row_d);
+ hipFree(col_d);
+ hipFree(c_array_d);
+ hipFree(s_array_d);
+ hipFree(node_value_d);
+ hipFree(min_array_d);
+ hipFree(stop_d);
+
+ return 0;
+}
+
+void print_vector(int *vector, int num)
+{
+
+ FILE * fp = fopen("result.out", "w");
+ if (!fp) {
+ printf("ERROR: unable to open result.txt\n");
+ }
+
+ for (int i = 0; i < num; i++) {
+ fprintf(fp, "%d\n", vector[i]);
+ }
+
+ fclose(fp);
+
+}
+
+void print_vectorf(float *vector, int num)
+{
+
+ FILE * fp = fopen("result.out", "w");
+ if (!fp) {
+ printf("ERROR: unable to open result.txt\n");
+ }
+
+ for (int i = 0; i < num; i++) {
+ fprintf(fp, "%f\n", vector[i]);
+ }
+
+ fclose(fp);
+
+}
diff --git a/src/gpu/pannotia/pagerank/Makefile b/src/gpu/pannotia/pagerank/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/Makefile
@@ -0,0 +1,11 @@
+default:
+ make -f Makefile.default
+
+clean:
+ make -f Makefile.default clean
+
+gem5-fusion:
+ make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+ make -f Makefile.gem5-fusion clean
diff --git a/src/gpu/pannotia/pagerank/Makefile.default b/src/gpu/pannotia/pagerank/Makefile.default
new file mode 100644
index 0000000..db96b44
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/Makefile.default
@@ -0,0 +1,28 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+OPTS = -O3
+
+BASEEXE = pagerank
+VARIANT ?= DEFAULT
+ifeq ($(VARIANT),DEFAULT)
+ EXECUTABLE = $(BASEEXE)
+ CPPFILES += pagerank.cpp
+else ifeq ($(VARIANT),SPMV)
+ EXECUTABLE = $(BASEEXE)_spmv
+ CPPFILES += pagerank_spmv.cpp
+endif
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/pagerank/Makefile.gem5-fusion b/src/gpu/pannotia/pagerank/Makefile.gem5-fusion
new file mode 100644
index 0000000..06bd88b
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/Makefile.gem5-fusion
@@ -0,0 +1,34 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+OPTS = -O3
+
+# these are needed for m5ops
+# TODO: Need some sort of explicit PATH? Read in?
+GEM5_PATH ?= /nobackup/sinclair/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I/../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BASEEXE = pagerank
+VARIANT ?= DEFAULT
+ifeq ($(VARIANT),DEFAULT)
+ EXECUTABLE = $(BASEEXE).gem5
+ CPPFILES += pagerank.cpp
+else ifeq ($(VARIANT),SPMV)
+ EXECUTABLE = $(BASEEXE)_spmv.gem5
+ CPPFILES += pagerank_spmv.cpp
+endif
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/pagerank/README.md b/src/gpu/pannotia/pagerank/README.md
new file mode 100644
index 0000000..726272a
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/README.md
@@ -0,0 +1,66 @@
+---
+title: Pannotia PageRank Test
+tags:
+ - x86
+ - amdgpu
+layout: default
+permalink: resources/pannotia/pagerank
+shortdoc: >
+ Resources to build a disk image with the GCN3 Pannotia PageRank workload.
+---
+
+PageRank (PR) is a graph analytics application that is part of the Pannotia benchmark suite. It is an algorithm designed to calculate probability distributions representing the likelihood that a person randomly clicking on links arrives at any particular page. The provided version is for use with the gpu-compute model of gem5. Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling both PageRank variants, compiling the GCN3_X86/Vega_X86 versions of gem5, and running both PageRank variants on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+PR has two variants: default and spmv. To compile the "default" variant:
+
+```
+cd src/gpu/pannotia/pagerank
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+To compile the "spmv" variant:
+
+```
+cd src/gpu/pannotia/pagerank
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu bash -c "export VARIANT=SPMV ; make gem5-fusion"
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead. Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/). By default, for both variants the Makefile builds for gfx801 and gfx803, and the binaries are placed in the src/gpu/pannotia/pagerank/bin folder. Moreover, by default the VARIANT variable PageRank's Makefile assumes the csr variant is being used, hence why this variable does not need to be set for compiling it.
+
+## Compiling GCN3_X86/gem5.opt
+
+PageRank is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture. The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running PageRank on GCN3_X86/gem5.opt
+
+The following command shows how to run the PageRank default version:
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/pagerank/coAuthorsDBLP.graph
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/pagerank/bin -c pagerank.gem5 --options="coAuthorsDBLP.graph 0"
+```
+
+To run the PageRank spmv version:
+
+# Assuming gem5, pannotia (input graphs, see below), and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/pagerank/coAuthorsDBLP.graph
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/pagerank/bin -c pagerank_spmv.gem5 --options="coAuthorsDBLP.graph 1"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>. We recommend you start with the coAuthorsDBLP input for PR.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.
diff --git a/src/gpu/pannotia/pagerank/kernel.h b/src/gpu/pannotia/pagerank/kernel.h
new file mode 100644
index 0000000..f2407aa
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/kernel.h
@@ -0,0 +1,145 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief pagerank 1
+ * @param row csr pointer array
+ * @param col csr column array
+ * @param data weight array
+ * @param page_rank1 pagerank array 1
+ * @param page_rank2 pagerank array 2
+ * @param num_nodes number of vertices
+ * @param num_edges number of edges
+ */
+__global__ void
+pagerank1(int *row, int *col, int *data, float *page_rank1, float *page_rank2,
+ const int num_nodes, const int num_edges)
+{
+ // Get my workitem id
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ // Get the starting and ending pointers of the neighborlist
+ int start = row[tid];
+ int end;
+ if (tid + 1 < num_nodes) {
+ end = row[tid + 1];
+ } else {
+ end = num_edges;
+ }
+
+ int nid;
+ // Navigate the neighbor list
+ for (int edge = start; edge < end; edge++) {
+ nid = col[edge];
+ // Transfer the PageRank value to neighbors
+ atomicAdd(&page_rank2[nid], page_rank1[tid] / (float)(end - start));
+ }
+ }
+}
+
+/**
+ * @brief pagerank 2
+ * @param row csr pointer array
+ * @param col csr column array
+ * @param data weight array
+ * @param page_rank1 pagerank array 1
+ * @param page_rank2 pagerank array 2
+ * @param num_nodes number of vertices
+ * @param num_edges number of edges
+ */
+__global__ void
+pagerank2(int *row, int *col, int *data, float *page_rank1, float *page_rank2,
+ const int num_nodes, const int num_edges)
+{
+ // Get my workitem id
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ // Update pagerank value with the damping factor
+ if (tid < num_nodes) {
+ page_rank1[tid] = 0.15 / (float)num_nodes + 0.85 * page_rank2[tid];
+ page_rank2[tid] = 0.0f;
+ }
+}
+
+/**
+ * @brief inibuffer
+ * @param row csr pointer array
+ * @param page_rank1 pagerank array 1
+ * @param page_rank2 pagerank array 2
+ * @param num_nodes number of vertices
+ */
+__global__ void
+inibuffer(int *row, float *page_rank1, float *page_rank2, const int num_nodes,
+ const int num_edges)
+{
+ // Get my thread id
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ page_rank1[tid] = 1 / (float)num_nodes;
+ page_rank2[tid] = 0.0f;
+ }
+}
+
+#endif // KERNEL_H
diff --git a/src/gpu/pannotia/pagerank/kernel_spmv.h b/src/gpu/pannotia/pagerank/kernel_spmv.h
new file mode 100644
index 0000000..af956c6
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/kernel_spmv.h
@@ -0,0 +1,163 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#ifndef KERNEL_SPMV_H
+#define KERNEL_SPMV_H
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief inibuffer
+ * @param page_rank1 PageRank array 1
+ * @param page_rank2 PageRank array 2
+ * @param num_nodes number of vertices
+ */
+__global__ void
+inibuffer(float *page_rank1, float *page_rank2, const int num_nodes)
+{
+ // Get my workitem id
+ int tid = blockDim.x * blockIdx.x + threadIdx.x;
+ // Initialize two pagerank arrays
+ if (tid < num_nodes) {
+ page_rank1[tid] = 1 / (float)num_nodes;
+ page_rank2[tid] = 0.0f;
+ }
+}
+
+/**
+ * @brief inicsr
+ * @param row csr pointer array
+ * @param col csr col array
+ * @param data csr weigh array
+ * @param col_cnt array for #. out-going edges
+ * @param num_nodes number of vertices
+ * @param num_edges number of edges
+ */
+__global__ void
+inicsr(int *row, int *col, float *data, int *col_cnt, int num_nodes,
+ int num_edges)
+{
+ // Get my workitem id
+ int tid = blockDim.x * blockIdx.x + threadIdx.x;
+ if (tid < num_nodes) {
+ // Get the starting and ending pointers
+ int start = row[tid];
+ int end;
+ if (tid + 1 < num_nodes) {
+ end = row[tid + 1] ;
+ } else {
+ end = num_edges;
+ }
+
+ int nid;
+ // Navigate one row of data
+ for (int edge = start; edge < end; edge++) {
+ nid = col[edge];
+ // Each neighbor will get equal amount of pagerank
+ data[edge] = 1.0 / (float)col_cnt[nid];
+ }
+ }
+}
+
+/**
+ * @brief spmv_csr_scalar_kernel (simple spmv)
+ * @param num_nodes number of vertices
+ * @param row csr pointer array
+ * @param col csr col array
+ * @param data csr weigh array
+ * @param x input vector
+ * @param y output vector
+ */
+__global__ void
+spmv_csr_scalar_kernel(const int num_nodes, int *row, int *col, float *data,
+ float *x, float *y)
+{
+ // Get my workitem id
+ int tid = blockDim.x * blockIdx.x + threadIdx.x;
+ if (tid < num_nodes) {
+ // Get the start and end pointers
+ int row_start = row[tid];
+ int row_end = row[tid + 1];
+ float sum = 0;
+ //navigate one row and sum all the elements
+ for (int j = row_start; j < row_end; j++) {
+ sum += data[j] * x[col[j]];
+ }
+ y[tid] += sum;
+ }
+}
+
+/**
+ * @brief pagerank2
+ * @param page_rank1 PageRank array 1
+ * @param page_rank2 PageRank array 2
+ * @param num_nodes number of vertices
+ */
+__global__ void
+pagerank2(float *page_rank1, float *page_rank2, const int num_nodes)
+{
+ // Get my workitem id
+ int tid = blockDim.x * blockIdx.x + threadIdx.x;
+ // Update pagerank value with damping factor
+ if (tid < num_nodes) {
+ page_rank1[tid] = 0.15f / (float)num_nodes + 0.85f * page_rank2[tid];
+ page_rank2[tid] = 0.0f;
+ }
+}
+
+#endif // KERNEL_SPMV_H
diff --git a/src/gpu/pannotia/pagerank/pagerank.cpp b/src/gpu/pannotia/pagerank/pagerank.cpp
new file mode 100644
index 0000000..3d88882
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/pagerank.cpp
@@ -0,0 +1,263 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+// Iteration count
+#define ITER 20
+
+void print_vectorf(float *vector, int num);
+
+int main(int argc, char **argv)
+{
+ char *tmpchar;
+
+ int num_nodes;
+ int num_edges;
+ int file_format = 1;
+ bool directed = 0;
+
+ hipError_t err = hipSuccess;
+
+ if (argc == 3) {
+ tmpchar = argv[1]; // Graph inputfile
+ file_format = atoi(argv[2]); // File format
+ } else {
+ fprintf(stderr, "You did something wrong!\n");
+ exit(1);
+ }
+
+ // Allocate the csr structure
+ csr_array *csr;
+
+ // Parse graph files into csr structure
+ if (file_format == 1) {
+ // Metis
+ csr = parseMetis(tmpchar, &num_nodes, &num_edges, directed);
+ } else if (file_format == 0) {
+ // Dimacs9
+ csr = parseCOO(tmpchar, &num_nodes, &num_edges, 1);
+ } else if (file_format == 2) {
+ // Matrix market
+ csr = parseMM(tmpchar, &num_nodes, &num_edges, directed, 0);
+ } else {
+ printf("reserve for future");
+ exit(1);
+ }
+
+ // Allocate rank_array
+ float *rank_array = (float *)malloc(num_nodes * sizeof(float));
+ if (!rank_array) {
+ fprintf(stderr, "rank array not allocated successfully\n");
+ return -1;
+ }
+
+ int *row_d;
+ int *col_d;
+ int *data_d;
+
+ float *pagerank1_d;
+ float *pagerank2_d;
+
+ // Create device-side buffers for the graph
+ err = hipMalloc(&row_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&col_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc col_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&data_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc data_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Create buffers for pagerank
+ err = hipMalloc(&pagerank1_d, num_nodes * sizeof(float));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc pagerank1_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&pagerank2_d, num_nodes * sizeof(float));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc pagerank2_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+// double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+ m5_work_begin(0, 0);
+#endif
+
+ // Copy the data to the device-side buffers
+ err = hipMemcpy(row_d, csr->row_array, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR:#endif hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Set up work dimensions
+ int block_size = 256;
+ int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+ dim3 threads(block_size, 1, 1);
+ dim3 grid(num_blocks, 1, 1);
+
+// double timer3 = gettime();
+
+ // Launch the initialization kernel
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(inibuffer), dim3(grid), dim3(threads), 0, 0, row_d, pagerank1_d, pagerank2_d, num_nodes,
+ num_edges);
+ hipDeviceSynchronize();
+ err = hipGetLastError();
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: cudaLaunch failed (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Run PageRank for some iter. TO: convergence determination
+ for (int i = 0; i < ITER; i++) {
+ // Launch pagerank kernel 1
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(pagerank1), dim3(grid), dim3(threads), 0, 0, row_d, col_d, data_d, pagerank1_d,
+ pagerank2_d, num_nodes, num_edges);
+
+ // Launch pagerank kernel 2
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(pagerank2), dim3(grid), dim3(threads), 0, 0, row_d, col_d, data_d, pagerank1_d,
+ pagerank2_d, num_nodes, num_edges);
+ }
+ hipDeviceSynchronize();
+
+// double timer4 = gettime();
+
+ // Copy the rank buffer back
+ err = hipMemcpy(rank_array, pagerank1_d, num_nodes * sizeof(float), hipMemcpyDeviceToHost);
+
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy() failed (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+#ifdef GEM5_FUSION
+ m5_work_end(0, 0);
+#endif
+
+// double timer2 = gettime();
+
+ // Report timing characteristics
+// printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+// printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+ // Print rank array
+ print_vectorf(rank_array, num_nodes);
+#endif
+
+ // Free the host-side arrays
+ free(rank_array);
+ csr->freeArrays();
+ free(csr);
+
+ // Free the device buffers
+ hipFree(row_d);
+ hipFree(col_d);
+ hipFree(data_d);
+
+ hipFree(pagerank1_d);
+ hipFree(pagerank2_d);
+
+ return 0;
+
+}
+
+void print_vectorf(float *vector, int num)
+{
+ FILE * fp = fopen("result.out", "w");
+ if (!fp) {
+ printf("ERROR: unable to open result.txt\n");
+ }
+
+ for (int i = 0; i < num; i++) {
+ fprintf(fp, "%f\n", vector[i]);
+ }
+
+ fclose(fp);
+}
+
diff --git a/src/gpu/pannotia/pagerank/pagerank_spmv.cpp b/src/gpu/pannotia/pagerank/pagerank_spmv.cpp
new file mode 100644
index 0000000..4f650c6
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/pagerank_spmv.cpp
@@ -0,0 +1,278 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel_spmv.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+// Iteration count
+#define ITER 20
+
+void print_vectorf(float *vector, int num);
+
+int main(int argc, char **argv)
+{
+ char *tmpchar;
+
+ int num_nodes;
+ int num_edges;
+ int file_format = 1;
+ bool directed = 0;
+
+ hipError_t err = hipSuccess;
+
+ if (argc == 3) {
+ tmpchar = argv[1]; // Graph inputfile
+ file_format = atoi(argv[2]);
+ } else {
+ fprintf(stderr, "You did something wrong!\n");
+ exit(1);
+ }
+
+ // Allocate the csr structure
+ csr_array *csr;
+
+ // Parse graph files into csr structure
+ if (file_format == 1) {
+ csr = parseMetis_transpose(tmpchar, &num_nodes, &num_edges, directed);
+ } else if (file_format == 0) {
+ csr = parseCOO_transpose(tmpchar, &num_nodes, &num_edges, directed);
+ } else {
+ printf("reserve for future");
+ exit(1);
+ }
+
+ // Allocate rank_arrays
+ float *pagerank_array = (float *)malloc(num_nodes * sizeof(float));
+ if (!pagerank_array) fprintf(stderr, "malloc failed page_rank_array\n");
+ float *pagerank_array2 = (float *)malloc(num_nodes * sizeof(float));
+ if (!pagerank_array2) fprintf(stderr, "malloc failed page_rank_array2\n");
+
+ int *row_d;
+ int *col_d;
+ float *data_d;
+
+ float *pagerank1_d;
+ float *pagerank2_d;
+ int *col_cnt_d;
+
+ // Create device-side buffers for the graph
+ err = hipMalloc(&row_d, (num_nodes + 1) * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&col_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc col_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&data_d, num_edges * sizeof(float));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc data_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Create buffers for pagerank
+ err = hipMalloc(&pagerank1_d, num_nodes * sizeof(float));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc pagerank1_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&pagerank2_d, num_nodes * sizeof(float));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc pagerank2_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&col_cnt_d, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc col_cnt_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+ m5_work_begin(0, 0);
+#endif
+
+ // Copy the data to the device-side buffers
+ err = hipMemcpy(row_d, csr->row_array, (num_nodes + 1) * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR:#endif hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(col_cnt_d, csr->col_cnt, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy col_cnt_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Set up work dimensions
+ int block_size = 64;
+ int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+ dim3 threads(block_size, 1, 1);
+ dim3 grid(num_blocks, 1, 1);
+
+ double timer3 = gettime();
+
+ // Launch the initialization kernel
+ hipLaunchKernelGGL(inibuffer, dim3(grid), dim3(threads), 0, 0, pagerank1_d, pagerank2_d, num_nodes);
+ hipDeviceSynchronize();
+ err = hipGetLastError();
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipLaunchByPtr failed (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Initialize the CSR
+ hipLaunchKernelGGL(inicsr, dim3(grid), dim3(threads), 0, 0, row_d, col_d, data_d, col_cnt_d, num_nodes,
+ num_edges);
+ hipDeviceSynchronize();
+ err = hipGetLastError();
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipLaunchByPtr failed (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Run PageRank for some iter. TO: convergence determination
+ for (int i = 0; i < ITER; i++) {
+ // Launch pagerank kernel 1
+ hipLaunchKernelGGL(spmv_csr_scalar_kernel, dim3(grid), dim3(threads), 0, 0, num_nodes, row_d, col_d,
+ data_d, pagerank1_d,
+ pagerank2_d);
+
+ // Launch pagerank kernel 2
+ hipLaunchKernelGGL(pagerank2, dim3(grid), dim3(threads), 0, 0, pagerank1_d, pagerank2_d, num_nodes);
+ }
+ hipDeviceSynchronize();
+
+ double timer4 = gettime();
+
+ // Copy the rank buffer back
+ err = hipMemcpy(pagerank_array, pagerank1_d, num_nodes * sizeof(float), hipMemcpyDeviceToHost);
+
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy() failed (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+#ifdef GEM5_FUSION
+ m5_work_end(0, 0);
+#endif
+
+ double timer2 = gettime();
+
+ // Report timing characteristics
+ printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+ printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+ // Print rank array
+ print_vectorf(pagerank_array, num_nodes);
+#endif
+
+ // Free the host-side arrays
+ free(pagerank_array);
+ free(pagerank_array2);
+ csr->freeArrays();
+ free(csr);
+
+ // Free the device buffers
+ hipFree(row_d);
+ hipFree(col_d);
+ hipFree(data_d);
+
+ hipFree(pagerank1_d);
+ hipFree(pagerank2_d);
+
+ return 0;
+}
+
+void print_vectorf(float *vector, int num)
+{
+ FILE * fp = fopen("result.out", "w");
+ if (!fp) {
+ printf("ERROR: unable to open result.txt\n");
+ }
+
+ for (int i = 0; i < num; i++) {
+ fprintf(fp, "%f\n", vector[i]);
+ }
+
+ fclose(fp);
+}
+
diff --git a/src/gpu/pannotia/sssp/Makefile b/src/gpu/pannotia/sssp/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/sssp/Makefile
@@ -0,0 +1,11 @@
+default:
+ make -f Makefile.default
+
+clean:
+ make -f Makefile.default clean
+
+gem5-fusion:
+ make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+ make -f Makefile.gem5-fusion clean
diff --git a/src/gpu/pannotia/sssp/Makefile.default b/src/gpu/pannotia/sssp/Makefile.default
new file mode 100644
index 0000000..a0dfab1
--- /dev/null
+++ b/src/gpu/pannotia/sssp/Makefile.default
@@ -0,0 +1,29 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+BASEEXE = sssp
+VARIANT ?= CSR
+ifeq ($(VARIANT),CSR)
+ EXECUTABLE = $(BASEEXE)
+ CPPFILES += sssp_csr.cpp
+else ifeq ($(VARIANT),ELL)
+ EXECUTABLE = $(BASEEXE)_ell
+ CPPFILES += sssp_ell.cpp
+endif
+
+OPTS = -O3
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) -O3 --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/sssp/Makefile.gem5-fusion b/src/gpu/pannotia/sssp/Makefile.gem5-fusion
new file mode 100644
index 0000000..cb58ff0
--- /dev/null
+++ b/src/gpu/pannotia/sssp/Makefile.gem5-fusion
@@ -0,0 +1,35 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+# TODO: Need some sort of explicit PATH? Read in?
+GEM5_PATH ?= /nobackup/sinclair/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BASEEXE = sssp
+VARIANT ?= CSR
+ifeq ($(VARIANT),CSR)
+ EXECUTABLE = $(BASEEXE).gem5
+ CPPFILES += sssp_csr.cpp
+else ifeq ($(VARIANT),ELL)
+ EXECUTABLE = $(BASEEXE)_ell.gem5
+ CPPFILES += sssp_ell.cpp
+endif
+
+OPTS = -O3
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+ $(HIPCC) -O3 --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+ mkdir -p $(BIN_DIR)
+
+clean:
+ rm -rf $(BIN_DIR)
+
+.PHONY: square clean
diff --git a/src/gpu/pannotia/sssp/README.md b/src/gpu/pannotia/sssp/README.md
new file mode 100644
index 0000000..43cfe0a
--- /dev/null
+++ b/src/gpu/pannotia/sssp/README.md
@@ -0,0 +1,66 @@
+---
+title: Pannotia SSSP Test
+tags:
+ - x86
+ - amdgpu
+layout: default
+permalink: resources/pannotia/sssp
+shortdoc: >
+ Resources to build a disk image with the GCN3 Pannotia SSSP workload.
+---
+
+Single-Source Shortest Path (sssp) is a graph analytics application that is part of the Pannotia benchmark suite. It is designed to calculate the shortest paths between the source vertex and all the other vertices in a graph. The provided version is for use with the gpu-compute model of gem5. Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling both SSSP variants, compiling the GCN3_X86/Vega_X86 versions of gem5, and running both SSSP variants on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+SSSP has two variants: csr and ell. To compile the "csr" variant:
+
+```
+cd src/gpu/pannotia/sssp
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+To compile the "ell" variant:
+
+```
+cd src/gpu/pannotia/sssp
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu bash -c "export VARIANT=ELL ; make gem5-fusion"
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead. Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/). By default, for both variants the Makefile builds for gfx801 and gfx803, and the binaries are placed in the src/gpu/pannotia/sssp/bin folder. Moreover, by default the VARIANT variable SSSP's Makefile assumes the csr variant is being used, hence why this variable does not need to be set for compiling it.
+
+## Compiling GCN3_X86/gem5.opt
+
+SSSP is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture. The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running SSSP on GCN3_X86/gem5.opt
+
+The following command shows how to run the SSSP csr version:
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/sssp/bin -c sssp_csr.gem5 --options="1k_128k.gr 0"
+```
+
+To run the SSSP ell version:
+
+# Assuming gem5, pannotia (input graphs, see below), and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/sssp/bin -c sssp_ell.gem5 --options="1k_128k.gr 0"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>. We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr>), as this is the smallest input that can be run with SSSP. Note that 1k_128k is not designed for SSSP specifically though -- the above link has larger graphs designed to run with SSSP that you should consider using for larger experiments.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.
diff --git a/src/gpu/pannotia/sssp/kernel.h b/src/gpu/pannotia/sssp/kernel.h
new file mode 100644
index 0000000..e56140e
--- /dev/null
+++ b/src/gpu/pannotia/sssp/kernel.h
@@ -0,0 +1,190 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include "hip/hip_runtime.h"
+#define BIG_NUM 99999999
+
+/**
+ * @brief min.+
+ * @param num_nodes Number of vertices
+ * @param row CSR pointer array
+ * @param col CSR column array
+ * @param data Weight array
+ * @param x Input vector
+ * @param y Output vector
+ */
+__global__ void
+spmv_min_dot_plus_kernel(const int num_rows, int *row, int *col, int *data,
+ int *x, int *y)
+{
+ // Get my workitem id
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ if (tid < num_rows) {
+ // Get the start and end pointers
+ int row_start = row[tid];
+ int row_end = row[tid + 1];
+
+ // Perform + for each pair of elements and a reduction with min
+ int min = x[tid];
+ for (int i = row_start; i < row_end; i++) {
+ if (data[i] + x[col[i]] < min) {
+ min = data[i] + x[col[i]];
+ }
+ }
+ y[tid] = min;
+ }
+}
+
+/**
+ * @brief min.+
+ * @param num_nodes number of vertices
+ * @param height the height of the adjacency matrix (col-major)
+ * @param col the col array
+ * @param data the data array
+ * @param x the input vector
+ * @param y the output vector
+ */
+__global__ void
+ell_min_dot_plus_kernel(const int num_nodes, const int height, int *col,
+ int *data, int *x, int *y)
+{
+ // Get workitem id
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ int mat_offset = tid;
+ int min = x[tid];
+
+ // The vertices process a row of matrix (col-major)
+ for (int i = 0; i < height; i++) {
+ int mat_elem = data[mat_offset];
+ int vec_elem = x[col[mat_offset]];
+ if (mat_elem + vec_elem < min) {
+ min = mat_elem + vec_elem;
+ }
+ mat_offset += num_nodes;
+ }
+ y[tid] = min;
+ }
+}
+
+/**
+ * @brief vector_init
+ * @param vector1 vector1
+ * @param vector2 vector2
+ * @param i source vertex id
+ * @param num_nodes number of vertices
+ */
+__global__ void
+vector_init(int *vector1, int *vector2, const int i, const int num_nodes)
+{
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ if (tid == i) {
+ // If it is the source vertex
+ vector1[tid] = 0;
+ vector2[tid] = 0;
+ } else {
+ // If it a non-source vertex
+ vector1[tid] = BIG_NUM;
+ vector2[tid] = BIG_NUM;
+ }
+ }
+}
+
+/**
+ * @brief vector_assign
+ * @param vector1 vector1
+ * @param vector2 vector2
+ * @param num_nodes number of vertices
+ */
+__global__ void
+vector_assign(int *vector1, int *vector2, const int num_nodes)
+{
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ vector1[tid] = vector2[tid];
+ }
+}
+
+/**
+ * @brief vector_diff
+ * @param vector1 vector1
+ * @param vector2 vector2
+ * @param stop termination variable
+ * @param num_nodes number of vertices
+ */
+__global__ void
+vector_diff(int *vector1, int *vector2, int *stop, const int num_nodes)
+{
+ int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+ if (tid < num_nodes) {
+ if (vector2[tid] != vector1[tid]) {
+ *stop = 1;
+ }
+ }
+}
+
+#endif // KERNEL_H
diff --git a/src/gpu/pannotia/sssp/sssp_csr.cpp b/src/gpu/pannotia/sssp/sssp_csr.cpp
new file mode 100644
index 0000000..f971d17
--- /dev/null
+++ b/src/gpu/pannotia/sssp/sssp_csr.cpp
@@ -0,0 +1,300 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include <algorithm>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+void print_vector(int *vector, int num);
+
+int main(int argc, char **argv)
+{
+ char *tmpchar;
+ bool directed = 1;
+
+ int num_nodes;
+ int num_edges;
+ int file_format = 1;
+
+ hipError_t err = hipSuccess;
+
+ if (argc == 3) {
+ tmpchar = argv[1]; // Graph inputfile
+ file_format = atoi(argv[2]);
+ } else {
+ fprintf(stderr, "You did something wrong!\n");
+ exit(1);
+ }
+
+ // Allocate the csr structure
+ csr_array *csr;
+
+ // Parse the graph and store it into the CSR structure
+ if (file_format == 1) {
+ csr = parseMetis_transpose(tmpchar, &num_nodes, &num_edges, directed);
+ } else if (file_format == 0) {
+ csr = parseCOO_transpose(tmpchar, &num_nodes, &num_edges, directed);
+ } else {
+ printf("reserve for future");
+ exit(1);
+ }
+
+ // Allocate the cost array
+ int *cost_array = (int *)malloc(num_nodes * sizeof(int));
+ if (!cost_array) fprintf(stderr, "malloc failed cost_array\n");
+
+ // Set the cost array to zero
+ for (int i = 0; i < num_nodes; i++) {
+ cost_array[i] = 0;
+ }
+
+ // Create device-side buffers
+ int *row_d;
+ int *col_d;
+ int *data_d;
+ int *vector_d1;
+ int *vector_d2;
+ int *stop_d;
+
+ // Create the device-side graph structure
+ err = hipMalloc(&row_d, (num_nodes + 1) * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&col_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc col_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&data_d, num_edges * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc data_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Termination variable
+ err = hipMalloc(&stop_d, sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n", 1, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Create the device-side buffers for sssp
+ err = hipMalloc(&vector_d1, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc vector_d1 (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&vector_d2, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc vector_d2 (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ //double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+ m5_work_begin(0, 0);
+#endif
+
+ // Copy data to device side buffers
+ err = hipMemcpy(row_d, csr->row_array, (num_nodes + 1) * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(data_d, csr->data_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy data_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ //double timer3 = gettime();
+
+ // Work dimensions
+ int block_size = 64;
+ int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+ dim3 threads(block_size, 1, 1);
+ dim3 grid(num_blocks, 1, 1);
+
+ // Source vertex 0
+ int sourceVertex = 0;
+
+ // Launch the initialization kernel
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_init), dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2, sourceVertex, num_nodes);
+ hipDeviceSynchronize();
+ err = hipGetLastError();
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: vector_init failed (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ int stop = 1;
+ int cnt = 0;
+ // Main computation loop
+ for (int i = 1; i < num_nodes; i++) {
+ // Reset the termination variable
+ stop = 0;
+
+ // Copy the termination variable to the device
+ err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: write stop_d (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Launch the assignment kernel
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_assign), dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2, num_nodes);
+
+ // Launch the min.+ kernel
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(spmv_min_dot_plus_kernel), dim3(grid), dim3(threads), 0, 0, num_nodes, row_d, col_d,
+ data_d, vector_d1,
+ vector_d2);
+
+ // Launch the check kernel
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_diff), dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2,
+ stop_d, num_nodes);
+
+ // Read the termination variable back
+ err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: read stop_d (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Exit the loop
+ if (stop == 0) {
+ break;
+ }
+ cnt++;
+ }
+ hipDeviceSynchronize();
+ //double timer4 = gettime();
+
+ // Read the cost_array back
+ err = hipMemcpy(cost_array, vector_d1, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: read vector_d1 (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+#ifdef GEM5_FUSION
+ m5_work_end(0, 0);
+#endif
+
+ //double timer2 = gettime();
+
+ // Print the timing statistics
+ //printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+ //printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+ printf("number iterations = %d\n", cnt);
+
+#if 1
+ // Print cost_array
+ print_vector(cost_array, num_nodes);
+#endif
+
+ // Clean up the host arrays
+ free(cost_array);
+ csr->freeArrays();
+ free(csr);
+
+ // Clean up the device-side buffers
+ hipFree(row_d);
+ hipFree(col_d);
+ hipFree(data_d);
+ hipFree(stop_d);
+ hipFree(vector_d1);
+ hipFree(vector_d2);
+
+ return 0;
+}
+
+void print_vector(int *vector, int num)
+{
+
+ FILE * fp = fopen("result.out", "w");
+ if (!fp) {
+ printf("ERROR: unable to open result.txt\n");
+ }
+
+ for (int i = 0; i < num; i++)
+ fprintf(fp, "%d: %d\n", i + 1, vector[i]);
+
+ fclose(fp);
+}
diff --git a/src/gpu/pannotia/sssp/sssp_ell.cpp b/src/gpu/pannotia/sssp/sssp_ell.cpp
new file mode 100644
index 0000000..a621b17
--- /dev/null
+++ b/src/gpu/pannotia/sssp/sssp_ell.cpp
@@ -0,0 +1,300 @@
+/************************************************************************************\
+ * *
+ * Copyright � 2014 Advanced Micro Devices, Inc. *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair *
+ * All rights reserved. *
+ * *
+ * Redistribution and use in source and binary forms, with or without *
+ * modification, are permitted provided that the following are met: *
+ * *
+ * You must reproduce the above copyright notice. *
+ * *
+ * Neither the name of the copyright holder nor the names of its contributors *
+ * may be used to endorse or promote products derived from this software *
+ * without specific, prior, written permission from at least the copyright holder. *
+ * *
+ * You must include the following terms in your license and/or other materials *
+ * provided with the software. *
+ * *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY *
+ * OF SUCH DAMAGE. *
+ * *
+ * Without limiting the foregoing, the software may implement third party *
+ * technologies for which you must obtain licenses from parties other than AMD. *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall *
+ * be responsible for obtaining the rights to use and/or distribute the applicable *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder. *
+ * *
+ * If you use the software (in whole or in part), you shall adhere to all *
+ * applicable U.S., European, and other export laws, including but not limited to *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774), *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009. Further, pursuant *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a *
+ * license granted by the United States Department of Commerce Bureau of Industry *
+ * and Security or as otherwise permitted pursuant to a License Exception under *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export, *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or *
+ * E:2 any restricted technology, software, or source code you receive hereunder, *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
+ * technology or software, if such foreign produced direct product is subject to *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR). For the most current Country Group *
+ * listings, or for additional information about the EAR or your obligations under *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's *
+ * website at http://www.bis.doc.gov/. *
+ * *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <algorithm>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+extern "C" {
+void m5_work_begin(uint64_t workid, uint64_t threadid);
+void m5_work_end(uint64_t workid, uint64_t threadid);
+}
+#endif
+
+#define BIGNUM 99999999
+
+void print_vector(int *vector, int num);
+
+int main(int argc, char **argv)
+{
+ char *tmpchar;
+ bool directed = 1;
+
+ int num_nodes;
+ int num_edges;
+ int file_format = 1;
+
+ hipError_t err = hipSuccess;
+
+ if (argc == 3) {
+ tmpchar = argv[1]; // Graph inputfile
+ file_format = atoi(argv[2]);
+ } else {
+ fprintf(stderr, "You did something wrong!\n");
+ exit(1);
+ }
+
+ // Allocate the csr structure
+ csr_array *csr;
+
+ // Parse the graph and store it into the CSR structure
+ if (file_format == 1) {
+ csr = parseMetis_transpose(tmpchar, &num_nodes, &num_edges, directed);
+ } else if (file_format == 0) {
+ csr = parseCOO_transpose(tmpchar, &num_nodes, &num_edges, directed);
+ } else {
+ printf("reserve for future");
+ exit(1);
+ }
+
+ // Allocate ell and transform from csr
+ ell_array *ell = csr2ell(csr, num_nodes, num_edges, BIGNUM);
+ int height = ell->max_height;
+
+ // Allocate the cost array
+ int *cost_array = (int *)malloc(num_nodes * sizeof(int));
+ if (!cost_array) fprintf(stderr, "malloc failed cost_array\n");
+
+ // Set the cost array to zero
+ for (int i = 0; i < num_nodes; i++) {
+ cost_array[i] = 0;
+ }
+
+ // Create device-side buffers
+ int *ell_col_d;
+ int *ell_data_d;
+ int *vector_d1;
+ int *vector_d2;
+ int *stop_d;
+
+ // Create the device-side graph structure
+ err = hipMalloc(&ell_col_d, height * num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc ell_col_d (size:%d) => %s\n", height * num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&ell_data_d, height * num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc ell_data_d (size:%d) => %s\n", height * num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Termination variable
+ err = hipMalloc(&stop_d, sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n", 1, hipGetErrorString(err));
+ return -1;
+ }
+
+ // Create the device-side buffers for sssp
+ err = hipMalloc(&vector_d1, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc vector_d1 (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+ err = hipMalloc(&vector_d2, num_nodes * sizeof(int));
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMalloc vector_d2 (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+ m5_work_begin(0, 0);
+#endif
+
+ // Copy data to device side buffers
+ err = hipMemcpy(ell_col_d, ell->col_array, height * num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy ell_col_d (size:%d) => %s\n", height * num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ err = hipMemcpy(ell_data_d, ell->data_array, height * num_nodes * sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: hipMemcpy ell_data_d (size:%d) => %s\n", height * num_nodes, hipGetErrorString(err));
+ return -1;
+ }
+
+ double timer3 = gettime();
+
+ // Work dimensions
+ int block_size = 64;
+ int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+ dim3 threads(block_size, 1, 1);
+ dim3 grid(num_blocks, 1, 1);
+
+ // Source vertex 0
+ int sourceVertex = 0;
+
+ // Launch the initialization kernel
+ hipLaunchKernelGGL(vector_init, dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2, sourceVertex, num_nodes);
+ hipDeviceSynchronize();
+ err = hipGetLastError();
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: vector_init failed (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ int stop = 1;
+ int cnt = 0;
+ // Main computation loop
+ for (int i = 1; i < num_nodes; i++) {
+ // Reset the termination variable
+ stop = 0;
+
+ // Copy the termination variable to the device
+ err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: write stop_d (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Launch the assignment kernel
+ hipLaunchKernelGGL(vector_assign, dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2, num_nodes);
+
+ // Launch the min.+ kernel
+ hipLaunchKernelGGL(ell_min_dot_plus_kernel, dim3(grid), dim3(threads), 0, 0, num_nodes, height,
+ ell_col_d, ell_data_d,
+ vector_d1, vector_d2);
+
+ // Launch the check kernel
+ hipLaunchKernelGGL(vector_diff, dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2,
+ stop_d, num_nodes);
+
+ // Read the termination variable back
+ err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: read stop_d (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+ // Exit the loop
+ if (stop == 0) {
+ break;
+ }
+ cnt++;
+ }
+ hipDeviceSynchronize();
+ double timer4 = gettime();
+
+ // Read the cost_array back
+ err = hipMemcpy(cost_array, vector_d1, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+ if (err != hipSuccess) {
+ fprintf(stderr, "ERROR: read vector_d1 (%s)\n", hipGetErrorString(err));
+ return -1;
+ }
+
+#ifdef GEM5_FUSION
+ m5_work_end(0, 0);
+#endif
+
+ double timer2 = gettime();
+
+ // Print the timing statistics
+ printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+ printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+ printf("number iterations = %d\n", cnt);
+
+#if 1
+ // Print cost_array
+ print_vector(cost_array, num_nodes);
+#endif
+
+ // Clean up the host arrays
+ free(cost_array);
+ csr->freeArrays();
+ free(csr);
+
+ free(ell->col_array);
+ free(ell->data_array);
+ free(ell);
+
+ // Clean up the device-side buffers
+ hipFree(ell_col_d);
+ hipFree(ell_data_d);
+ hipFree(stop_d);
+ hipFree(vector_d1);
+ hipFree(vector_d2);
+
+ return 0;
+}
+
+void print_vector(int *vector, int num)
+{
+
+ FILE * fp = fopen("result.out", "w");
+ if (!fp) {
+ printf("ERROR: unable to open result.txt\n");
+ }
+
+ for (int i = 0; i < num; i++)
+ fprintf(fp, "%d: %d\n", i + 1, vector[i]);
+
+ fclose(fp);
+}