resources: add Pannotia to gem5-resources This commit adds hipify'd versions of Pannotia to gem5-resouces. All applications were tested in gem5 and on a Vega-class GPU to ensure correctness. The original porting work was done by Gaurav Jain, I just updated and tested them. Note that Pannotia, from prior work by Joel Hestness, already had support for m5ops. I also updated this support to work with the current support for m5ops. All benchmark's Makefiles assuming GEM5_ROOT is either already specificed as an environment variable or must be set properly in the Makefile for m5ops to work correctly. Change-Id: I562951130e840dc44b40270c57d8948a1cde459b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5-resources/+/51508 Reviewed-by: Matthew Poremba <matthew.poremba@amd.com> Reviewed-by: Bobby R. Bruce <bbruce@ucdavis.edu> Maintainer: Bobby R. Bruce <bbruce@ucdavis.edu> Tested-by: Bobby R. Bruce <bbruce@ucdavis.edu>

commit: 9ace72a49010a45c3ca2ddc80006a96b65cfe0e1 [log] [tgz]
author: Matt Sinclair <mattdsinclair@gmail.com> Tue Oct 12 21:42:59 2021 -0500
committer: Matt Sinclair <mattdsinclair@gmail.com> Thu Oct 21 18:55:28 2021 +0000
tree: 0793d39aa515edb0476bd0ed000961c59429a8bf
parent: 303e5fe39ffb6d44b5ca394a2db71dff2a035468 [diff]
diff --git a/src/gpu/pannotia/README.md b/src/gpu/pannotia/README.md
new file mode 100644
index 0000000..dca4bed
--- /dev/null
+++ b/src/gpu/pannotia/README.md

@@ -0,0 +1,12 @@
+---
+title: Pannotia Tests
+tags:
+    - x86
+    - amdgpu
+layout: default
+permalink: resources/pannotia/
+shortdoc: >
+    Resources to build a disk image for each of the GCN3 Pannotia workloads.
+---
+
+This folder and its subfolders contain each of the 9 Pannotia benchmarks (there are 6 folders because Color, and PageRank, SSSP each have 2 versions).  All of these benchmarks have been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.  See each application's README for details on how to compile and run them in gem5 using the GCN3 GPU model.

diff --git a/src/gpu/pannotia/bc/BC.cpp b/src/gpu/pannotia/bc/BC.cpp
new file mode 100644
index 0000000..df676d9
--- /dev/null
+++ b/src/gpu/pannotia/bc/BC.cpp

@@ -0,0 +1,322 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include <algorithm>
+#include "BC.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#ifdef GEM5_FUSION
+#define MAX_ITERS 150
+#else
+#include <stdint.h>
+#define MAX_ITERS INT32_MAX
+#endif
+
+void print_vector(int *vector, int num);
+void print_vectorf(float *vector, int num);
+
+int main(int argc, char **argv)
+{
+    char *tmpchar;
+
+    int num_nodes;
+    int num_edges;
+    bool directed = 1;
+
+    hipError_t err;
+
+    if (argc == 2) {
+        tmpchar     = argv[1];       //graph inputfile
+    } else {
+        fprintf(stderr, "You did something wrong!\n");
+        exit(1);
+    }
+
+    // Parse graph and store it in a CSR format
+    csr_array *csr = parseCOO(tmpchar, &num_nodes, &num_edges, directed);
+
+    // Allocate the bc host array
+    float *bc_h = (float *)malloc(num_nodes * sizeof(float));
+    if (!bc_h) fprintf(stderr, "malloc failed bc_h\n");
+
+    // Create device-side buffers
+    float *bc_d, *sigma_d, *rho_d;
+    int *dist_d, *stop_d;
+    int *row_d, *col_d, *row_trans_d, *col_trans_d;
+
+    // Create betweenness centrality buffers
+    err = hipMalloc(&bc_d, num_nodes * sizeof(float));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc bc_d %s\n", hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&dist_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc dist_d %s\n", hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&sigma_d, num_nodes * sizeof(float));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc sigma_d %s\n", hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&rho_d, num_nodes * sizeof(float));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc rho_d %s\n", hipGetErrorString(err));
+        return -1;
+    }
+
+    // Create termination variable buffer
+    err = hipMalloc(&stop_d, sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc stop_d %s\n", hipGetErrorString(err));
+        return -1;
+    }
+
+    // Create graph buffers
+    err = hipMalloc(&row_d, (num_nodes + 1) * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc row_d %s\n", hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&col_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc col_d %s\n", hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&row_trans_d, (num_nodes + 1) * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc row_trans_d %s\n", hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&col_trans_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc col_trans_d %s\n", hipGetErrorString(err));
+        return -1;
+    }
+
+    //double timer1, timer2;
+    //double timer3, timer4;
+
+    //timer1 = gettime();
+
+#ifdef GEM5_FUSION
+    m5_work_begin(0, 0);
+#endif
+
+    // Copy data to device-side buffers
+    err = hipMemcpy(row_d, csr->row_array, (num_nodes + 1) * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Copy data to device-side buffers
+    err = hipMemcpy(row_trans_d, csr->row_array_t, (num_nodes + 1) * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy row_trans_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(col_trans_d, csr->col_array_t, num_edges * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy col_trans_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    //timer3 = gettime();
+
+    // Set up kernel dimensions
+    int local_worksize = 128;
+    dim3 threads(local_worksize, 1, 1);
+    int num_blocks = (num_nodes + local_worksize - 1) / local_worksize;
+    dim3 grid(num_blocks, 1, 1);
+
+    // Initialization
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(clean_bc), dim3(grid), dim3(threads ), 0, 0, bc_d, num_nodes);
+
+    // Main computation loop
+    for (int i = 0; i < num_nodes && i < MAX_ITERS; i++) {
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(clean_1d_array), dim3(grid), dim3(threads ), 0, 0, i, dist_d, sigma_d, rho_d,
+                                            num_nodes);
+
+        // Depth of the traversal
+        int dist = 0;
+        // Termination variable
+        int stop = 1;
+
+        // Traverse the graph from the source node i
+        do {
+            stop = 0;
+
+            // Copy the termination variable to the device
+            hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+
+            hipLaunchKernelGGL(HIP_KERNEL_NAME(bfs_kernel), dim3(grid), dim3(threads ), 0, 0, row_d, col_d, dist_d, rho_d, stop_d,
+                                            num_nodes, num_edges, dist);
+
+            // Copy back the termination variable from the device
+            hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+
+            // Another level
+            dist++;
+
+        } while (stop);
+
+        hipDeviceSynchronize();
+
+        // Traverse back from the deepest part of the tree
+        while (dist) {
+            hipLaunchKernelGGL(HIP_KERNEL_NAME(backtrack_kernel), dim3(grid), dim3(threads ), 0, 0, row_trans_d, col_trans_d,
+                                                dist_d, rho_d, sigma_d,
+                                                num_nodes, num_edges, dist, i,
+                                                bc_d);
+
+            // Back one level
+            dist--;
+        }
+        hipDeviceSynchronize();
+	fprintf(stdout, "Completed iteration %d\n", i);
+    }
+    hipDeviceSynchronize();
+    //timer4 = gettime();
+
+    // Copy back the results for the bc array
+    err = hipMemcpy(bc_h, bc_d, num_nodes * sizeof(float), hipMemcpyDeviceToHost);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: read buffer bc_d (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+#ifdef GEM5_FUSION
+    m5_work_end(0, 0);
+#endif
+
+    //timer2 = gettime();
+
+    //printf("kernel + memcopy time = %lf ms\n", (timer4 - timer3) * 1000);
+    //printf("kernel execution time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+    //dump the results to the file
+    print_vectorf(bc_h, num_nodes);
+#endif
+
+    // Clean up the host-side buffers
+    free(bc_h);
+    free(csr->row_array);
+    free(csr->col_array);
+    free(csr->data_array);
+    free(csr->row_array_t);
+    free(csr->col_array_t);
+    free(csr->data_array_t);
+    free(csr);
+
+    // Clean up the device-side buffers
+    hipFree(bc_d);
+    hipFree(dist_d);
+    hipFree(sigma_d);
+    hipFree(rho_d);
+    hipFree(stop_d);
+    hipFree(row_d);
+    hipFree(col_d);
+    hipFree(row_trans_d);
+    hipFree(col_trans_d);
+
+    return 0;
+}
+
+void print_vector(int *vector, int num)
+{
+    for (int i = 0; i < num; i++)
+        printf("%d: %d \n", i + 1, vector[i]);
+    printf("\n");
+}
+
+void print_vectorf(float *vector, int num)
+{
+
+    FILE * fp = fopen("result.out", "w");
+    if (!fp) {
+        printf("ERROR: unable to open result.txt\n");
+    }
+
+    for (int i = 0; i < num; i++) {
+        fprintf(fp, "%f\n", vector[i]);
+    }
+
+    fclose(fp);
+
+}

diff --git a/src/gpu/pannotia/bc/BC.h b/src/gpu/pannotia/bc/BC.h
new file mode 100644
index 0000000..0abdb12
--- /dev/null
+++ b/src/gpu/pannotia/bc/BC.h

@@ -0,0 +1,230 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+typedef struct csr_array_t {
+
+    int *row_array;
+    int *col_array;
+    int *data_array;
+
+    int *row_array_t;
+    int *col_array_t;
+    int *data_array_t;
+
+} csr_array;
+
+
+typedef struct cooedgetuple {
+    int row;
+    int col;
+    int val;
+} CooTuple;
+
+bool compare(
+    CooTuple elem1,
+    CooTuple elem2)
+{
+    if (elem1.row < elem2.row)
+        return true;
+    return false;
+}
+
+void transform(CooTuple *tuple_array, int num_edges, int *row_array, int *col_array, int *data_array)
+{
+
+    int row_cnt = 0;
+    int prev = -1;
+    int idx;
+
+    for (idx = 0; idx < num_edges; idx++) {
+        int curr = tuple_array[idx].row;
+        if (curr != prev) {
+            row_array[row_cnt++] = idx;
+            prev = curr;
+        }
+
+        col_array[idx]  = tuple_array[idx].col;
+        data_array[idx] = tuple_array[idx].val;
+
+    }
+    row_array[row_cnt] = idx;
+}
+
+csr_array * parseCOO(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+    int cnt = 0;
+    int cnt1 = 0;
+    unsigned int lineno = 0;
+    char sp[2], a, p;
+    char * line = (char *)malloc(8192 * sizeof(char));
+    int num_nodes = 0, num_edges = 0;
+
+    FILE *fptr;
+    CooTuple *tuple_array   = NULL;
+    CooTuple *tuple_array_t = NULL;
+
+    fptr = fopen(tmpchar, "r");
+    if (!fptr) {
+        fprintf(stderr, "Error when opennning file: %s\n", tmpchar);
+        perror("ERROR: ");
+        exit(1);
+    }
+
+    printf("Opening file: %s\n", tmpchar);
+
+    while (fgets(line, 8192, fptr)) {
+        int head, tail, weight;
+        switch (line[0]) {
+        case 'c':
+            break;
+        case 'p':
+            sscanf(line, "%c %s %d %d", &p, sp, p_num_nodes, p_num_edges);
+
+            if (!directed) {
+                *p_num_edges = *p_num_edges * 2;
+                printf("This is an undirected graph\n");
+            } else {
+                printf("This is a directed graph\n");
+            }
+
+            num_nodes  =  *p_num_nodes;
+            num_edges =   *p_num_edges;
+
+            printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+
+            tuple_array       = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+            if (!tuple_array) printf("malloc failed\n");
+            tuple_array_t     = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+            if (!tuple_array_t) printf("malloc failed\n");
+
+            break;
+
+        case 'a':
+            sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+
+            if (tail == head) printf("reporting self loop\n");
+
+            CooTuple temp, temp1;
+
+            temp.row = head - 1;
+            temp.col = tail - 1;
+            temp.val = weight;
+
+            temp1.row = tail - 1;
+            temp1.col = head - 1;
+            temp1.val = weight;
+
+            tuple_array[cnt++]   = temp;
+            tuple_array_t[cnt1++] = temp1;
+
+            if (!directed) {
+
+                temp.row = tail - 1;
+                temp.col = head - 1;
+                temp.val = weight;
+
+                temp1.row = head - 1;
+                temp1.col = tail - 1;
+                temp1.val = weight;
+
+                tuple_array[cnt++]   = temp;
+                tuple_array_t[cnt1++] = temp1;
+
+            }
+
+            break;
+        default:
+            fprintf(stderr, "exiting loop\n");
+            break;
+        }
+        lineno++;
+    }
+
+    std::stable_sort(tuple_array,   tuple_array   + num_edges, compare);
+    std::stable_sort(tuple_array_t, tuple_array_t + num_edges, compare);
+
+    int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+    int *col_array = (int *)malloc(num_edges * sizeof(int));
+    int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+    int *row_array_t = (int *)malloc((num_nodes + 1) * sizeof(int));
+    int *col_array_t = (int *)malloc(num_edges * sizeof(int));
+    int *data_array_t = (int *)malloc(num_edges * sizeof(int));
+
+    transform(tuple_array,   num_edges, row_array,   col_array, data_array);
+    transform(tuple_array_t, num_edges, row_array_t, col_array_t, data_array_t);
+
+    fclose(fptr);
+    free(tuple_array);
+    free(tuple_array_t);
+
+    csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+
+    csr -> row_array = row_array;
+    csr -> col_array = col_array;
+    csr -> data_array = data_array;
+
+    csr -> row_array_t  = row_array_t;
+    csr -> col_array_t  = col_array_t;
+    csr -> data_array_t = data_array_t;
+
+    free(line);
+
+    return csr;
+}
+

diff --git a/src/gpu/pannotia/bc/Makefile b/src/gpu/pannotia/bc/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/bc/Makefile

@@ -0,0 +1,11 @@
+default:
+	make -f Makefile.default
+
+clean:
+	make -f Makefile.default clean
+
+gem5-fusion:
+	make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+	make -f Makefile.gem5-fusion clean

diff --git a/src/gpu/pannotia/bc/Makefile.default b/src/gpu/pannotia/bc/Makefile.default
new file mode 100644
index 0000000..2147843
--- /dev/null
+++ b/src/gpu/pannotia/bc/Makefile.default

@@ -0,0 +1,24 @@
+CPPSRC = ../graph_parser/util.cpp
+CPPSRC = BC.cpp
+EXECUTABLE = bc_hip
+# BC has a compilation error in host code with ROCm 4.0, so O2 and O3 do not
+# pass even on real GPUs
+OPTS = -O1
+
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPSRC) ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) ../graph_parser/util.cpp $(CPPSRC) -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/bc/Makefile.gem5-fusion b/src/gpu/pannotia/bc/Makefile.gem5-fusion
new file mode 100644
index 0000000..6226539
--- /dev/null
+++ b/src/gpu/pannotia/bc/Makefile.gem5-fusion

@@ -0,0 +1,28 @@
+CPPSRC = BC.cpp
+EXECUTABLE = bc.gem5
+# BC has a compilation error in host code with ROCm 4.0, so O2 and O3 do not
+# pass even on real GPUs
+OPTS = -O1
+
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+GEM5_PATH ?= /path/to/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPSRC) ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) ../graph_parser/util.cpp $(CPPSRC) -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/bc/README.md b/src/gpu/pannotia/bc/README.md
new file mode 100644
index 0000000..60499b3
--- /dev/null
+++ b/src/gpu/pannotia/bc/README.md

@@ -0,0 +1,49 @@
+---
+title: Pannotia BC Test
+tags:
+    - x86
+    - amdgpu
+layout: default
+permalink: resources/pannotia/bc
+shortdoc: >
+    Resources to build a disk image with the GCN3 Pannotia BC workload.
+---
+
+Betweenness Centrality (BC) is a graph analytics application that is part of the Pannotia benchmark suite.  It is used to calculate betweenness centrality scores for all the vertices in a graph.  The provided version is for use with the gpu-compute model of gem5.  Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling BC, compiling the GCN3_X86/Vega_X86 versions of gem5, and running BC on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+To compile BC:
+
+```
+cd src/gpu/pannotia/bc
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead.  Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/).  By default, the Makefile builds for gfx801 and gfx803, and is placed in the src/gpu/pannotia/bc/bin folder.
+
+## Compiling GCN3_X86/gem5.opt
+
+BC is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture.  The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running BC on GCN3_X86/gem5.opt
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/bc/bin -c bc.gem5 --options="1k_128k.gr"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>.  We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr>), as this is the smallest input designed to run with BC.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.

diff --git a/src/gpu/pannotia/bc/kernel.h b/src/gpu/pannotia/bc/kernel.h
new file mode 100644
index 0000000..442f007
--- /dev/null
+++ b/src/gpu/pannotia/bc/kernel.h

@@ -0,0 +1,241 @@
+/************************************************************************************\
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#ifndef KERNEL_H_
+#define KERNEL_H_
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief   Breadth-first traversal
+ * @param   row       CSR pointer array
+ * @param   col       CSR column  array
+ * @param   d         Distance array
+ * @param   rho       Rho array
+ * @param   p         Dependency array
+ * @param   cont      Termination variable
+ * @param   num_nodes Termination variable
+ * @param   num_edges Termination variable
+ * @param   dist      Current traversal layer
+ */
+
+__global__ void
+bfs_kernel(int *row, int *col, int *d, float *rho, int *cont,
+           const int num_nodes, const int num_edges, const int dist)
+{
+    int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+    //navigate the current layer
+    if (tid < num_nodes && d[tid] == dist) {
+
+        //get the starting and ending pointers
+        //of the neighbor list
+
+        int start = row[tid];
+        int end;
+        if (tid + 1 < num_nodes)
+            end = row[tid + 1];
+        else
+            end = num_edges;
+
+        //navigate through the neighbor list
+        for (int edge = start; edge < end; edge++) {
+            int w = col[edge];
+            if (d[w] < 0) {
+                *cont = 1;
+                //traverse another layer
+                d[w] = dist + 1;
+            }
+            //transfer the rho value to the neighbor
+            if (d[w] == (dist + 1)) {
+                atomicAdd(&rho[w], rho[tid]);
+            }
+        }
+    }
+}
+
+/**
+ * @brief   Back traversal
+ * @param   row       CSR pointer array
+ * @param   col       CSR column  array
+ * @param   d         Distance array
+ * @param   rho       Rho array
+ * @param   sigma     Sigma array
+ * @param   p         Dependency array
+ * @param   cont      Termination variable
+ * @param   num_nodes Termination variable
+ * @param   num_edges Termination variable
+ * @param   dist      Current traversal layer
+ * @param   s         Source vertex
+ * @param   bc        Betweeness Centrality array
+ */
+
+__global__ void
+backtrack_kernel(int *row, int *col, int *d, float *rho, float *sigma,
+                 const int num_nodes, const int num_edges, const int dist,
+                 const int s, float* bc)
+{
+    int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+    // Navigate the current layer
+    if (tid < num_nodes && d[tid] == dist - 1) {
+
+        int start = row[tid];
+        int end;
+        if (tid + 1 < num_nodes)
+            end = row[tid + 1];
+        else
+            end = num_edges;
+
+        // Get the starting and ending pointers
+        // of the neighbor list in the reverse graph
+        for (int edge = start; edge < end; edge++) {
+            int w = col[edge];
+            // Update the sigma value traversing back
+            if (d[w] == dist - 2)
+                atomicAdd(&sigma[w], rho[w] / rho[tid] * (1 + sigma[tid]));
+        }
+
+        // Update the BC value
+        if (tid != s)
+            bc[tid] = bc[tid] + sigma[tid];
+    }
+
+}
+
+/**
+ * @brief   back_sum_kernel (not used)
+ * @param   s         Source vertex
+ * @param   dist      Current traversal layer
+ * @param   d         Distance array
+ * @param   sigma     Sigma array
+ * @param   bc        Betweeness Centrality array
+ * @param   num_nodes Termination variable
+ * @param   num_edges Termination variable
+ */
+__global__ void
+back_sum_kernel(const int s, const int dist, int *d, float *sigma, float *bc,
+                const int num_nodes)
+{
+    int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        // If it is not the source
+        if (s != tid && d[tid] == dist - 1) {
+            bc[tid] = bc[tid] + sigma[tid];
+        }
+    }
+}
+
+/**
+ * @brief   array set 1D
+ * @param   s           Source vertex
+ * @param   dist_array  Distance array
+ * @param   sigma       Sigma array
+ * @param   rho         Rho array
+ * @param   num_nodes Termination variable
+ */
+__global__ void
+clean_1d_array(const int source, int *dist_array, float *sigma, float *rho,
+               const int num_nodes)
+{
+    int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+
+        sigma[tid] = 0;
+
+        if (tid == source) {
+            // If source vertex rho = 1, dist = 0
+            rho[tid] = 1;
+            dist_array[tid] = 0;
+        } else {
+            // If other vertices rho = 0, dist = -1
+            rho[tid] = 0;
+            dist_array[tid] = -1;
+        }
+    }
+}
+
+/**
+ * @brief   array set 2D
+ * @param   p           Dependency array
+ * @param   num_nodes   Number of vertices
+ */
+__global__ void clean_2d_array(int *p, const int num_nodes)
+{
+    int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+    if (tid < num_nodes * num_nodes)
+        p[tid] = 0;
+}
+
+/**
+ * @brief   clean BC
+ * @param   bc_d        Betweeness Centrality array
+ * @param   num_nodes   Number of vertices
+ */
+__global__ void clean_bc(float *bc_d, const int num_nodes)
+{
+    int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+    if (tid < num_nodes)
+        bc_d[tid] = 0;
+}
+
+#endif // KERNEL_H_

diff --git a/src/gpu/pannotia/buildall.sh b/src/gpu/pannotia/buildall.sh
new file mode 100644
index 0000000..78192d2
--- /dev/null
+++ b/src/gpu/pannotia/buildall.sh

@@ -0,0 +1,72 @@
+#!/bin/bash
+
+extraparams=""
+buildparams=""
+if [ ! -z "$1" ]
+then
+	extraparams="$1"
+	buildparams="gem5-fusion"
+fi
+
+function savebin {
+	if [ "$2" == "" ]
+	then
+		backupbench="$1"
+	else
+		backupbench="gem5_fusion_$1"
+	fi
+	if [ -e "$backupbench" ]
+	then
+		echo "Saving bin $backupbench"
+		mv $backupbench $backupbench.bak
+	fi
+}
+
+function restorebin {
+	if [ "$2" == "" ]
+	then
+		backupbench="$1"
+	else
+		backupbench="gem5_fusion_$1"
+	fi
+	if [ -e "$backupbench.bak" ]
+	then
+		echo "Restoring bin $backupbench"
+		mv $backupbench.bak $backupbench
+	fi
+}
+
+for bench in bc color fw mis pagerank sssp
+do
+	echo $bench
+	pushd . >& /dev/null
+	cd $bench
+	make clean; make clean-gem5-fusion
+	make $buildparams $extraparams
+	if [ "$bench" == "color" ]
+	then
+		savebin "color_max" $buildparams
+		make clean; make clean-gem5-fusion
+		make $buildparams VARIANT=MAXMIN $extraparams
+		restorebin "color_max" $buildparams
+	elif [ "$bench" == "fw" ]
+	then
+		savebin "fw" $buildparams
+		make clean; make clean-gem5-fusion
+		make $buildparams VARIANT=BLOCK $extraparams
+		restorebin "fw" $buildparams
+	elif [ "$bench" == "pagerank" ]
+	then
+		savebin "pagerank" $buildparams
+		make clean; make clean-gem5-fusion
+		make $buildparams VARIANT=SPMV $extraparams
+		restorebin "pagerank" $buildparams
+	elif [ "$bench" == "sssp" ]
+	then
+		savebin "sssp" $buildparams
+		make clean; make clean-gem5-fusion
+		make $buildparams VARIANT=ELL $extraparams
+		restorebin "sssp" $buildparams
+	fi
+	popd >& /dev/null
+done

diff --git a/src/gpu/pannotia/cleanall.sh b/src/gpu/pannotia/cleanall.sh
new file mode 100644
index 0000000..256c4d3
--- /dev/null
+++ b/src/gpu/pannotia/cleanall.sh

@@ -0,0 +1,10 @@
+#!/bin/bash
+
+for bench in bc color fw mis pagerank sssp
+do
+	echo $bench
+	pushd . >& /dev/null
+	cd $bench
+	make clean; make clean-gem5-fusion
+	popd >& /dev/null
+done

diff --git a/src/gpu/pannotia/color/Makefile b/src/gpu/pannotia/color/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/color/Makefile

@@ -0,0 +1,11 @@
+default:
+	make -f Makefile.default
+
+clean:
+	make -f Makefile.default clean
+
+gem5-fusion:
+	make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+	make -f Makefile.gem5-fusion clean

diff --git a/src/gpu/pannotia/color/Makefile.default b/src/gpu/pannotia/color/Makefile.default
new file mode 100644
index 0000000..6cada0d
--- /dev/null
+++ b/src/gpu/pannotia/color/Makefile.default

@@ -0,0 +1,27 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+BASEEXE = color
+VARIANT ?= MAX
+ifeq ($(VARIANT),MAX)
+	EXECUTABLE = $(BASEEXE)_max
+	CPPFILES += coloring_max.cpp
+else ifeq ($(VARIANT),MAXMIN)
+	EXECUTABLE = $(BASEEXE)_maxmin
+	CPPFILES += coloring_maxmin.cpp
+endif
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) -O3 --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/color/Makefile.gem5-fusion b/src/gpu/pannotia/color/Makefile.gem5-fusion
new file mode 100644
index 0000000..703bf44
--- /dev/null
+++ b/src/gpu/pannotia/color/Makefile.gem5-fusion

@@ -0,0 +1,32 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+GEM5_PATH ?= /path/to/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I/../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BASEEXE = color
+VARIANT ?= MAX
+ifeq ($(VARIANT),MAX)
+	EXECUTABLE = $(BASEEXE)_max.gem5
+	CPPFILES += coloring_max.cpp
+else ifeq ($(VARIANT),MAXMIN)
+	EXECUTABLE = $(BASEEXE)_maxmin.gem5
+	CPPFILES += coloring_maxmin.cpp
+endif
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) -O3 --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/color/README.md b/src/gpu/pannotia/color/README.md
new file mode 100644
index 0000000..bc425bf
--- /dev/null
+++ b/src/gpu/pannotia/color/README.md

@@ -0,0 +1,68 @@
+---
+title: Pannotia Color Test
+tags:
+    - x86
+    - amdgpu
+layout: default
+permalink: resources/pannotia/bc
+shortdoc: >
+    Resources to build a disk image with the GCN3 Pannotia Color workload.
+---
+
+Graph Coloring (CLR) is a graph analytics application that is part of the Pannotia benchmark suite.  It is used to label the vertices of a graph with colors such that no two adjacent vertices share the same color.  The provided version is for use with the gpu-compute model of gem5.  Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling both CLR variants, compiling the GCN3_X86/Vega_X86 versions of gem5, and running both CLR variants on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+To compile Color:
+
+Color has two variants: max and maxmin.  To compile the "max" variant:
+
+```
+cd src/gpu/pannotia/clr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+To compile the "maxmin" variant:
+
+```
+cd src/gpu/pannotia/clr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu bash -c "export VARIANT=MAXMIN ; make gem5-fusion"
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead.  Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/).  By default, for both variants the Makefile builds for gfx801 and gfx803, and the binaries are placed in the src/gpu/pannotia/clr/bin folder.  Moreover, by default the VARIANT variable Color's Makefile assumes the max variant is being used, hence why this variable does not need to be set for compiling it.
+
+## Compiling GCN3_X86/gem5.opt
+
+Color is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture.  The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running Color on GCN3_X86/gem5.opt
+
+The following command shows how to run the CLR max version:
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/clr/bin -c color_max.gem5 --options="1k_128k.gr 0"
+```
+
+To run the CLR maxmin version:
+
+# Assuming gem5, pannotia (input graphs, see below), and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/clr/bin -c color_maxmin.gem5 --options="1k_128k.gr 0"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>.  We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr>), as this is the smallest input that can be run with CLR.  Note that 1k_128k is not designed for Color specifically though -- the above link has larger graphs designed to run with Color that you should consider using for larger experiments.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.

diff --git a/src/gpu/pannotia/color/coloring_max.cpp b/src/gpu/pannotia/color/coloring_max.cpp
new file mode 100644
index 0000000..b85245e
--- /dev/null
+++ b/src/gpu/pannotia/color/coloring_max.cpp

@@ -0,0 +1,303 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright © 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel_max.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#define RANGE 2048
+
+void print_vector(int *vector, int num);
+
+int main(int argc, char **argv)
+{
+    char *tmpchar;
+
+    int num_nodes;
+    int num_edges;
+    int file_format = 1;
+    bool directed = 0;
+
+    hipError_t err = hipSuccess;
+
+    if (argc == 3) {
+        tmpchar = argv[1];  //graph inputfile
+        file_format = atoi(argv[2]); //graph format
+    } else {
+        fprintf(stderr, "You did something wrong!\n");
+        exit(1);
+    }
+
+    srand(7);
+
+    // Allocate the CSR structure
+    csr_array *csr;
+
+    // Parse graph file and store into a CSR format
+    if (file_format == 1)
+        csr = parseMetis(tmpchar, &num_nodes, &num_edges, directed);
+    else if (file_format == 0)
+        csr = parseCOO(tmpchar, &num_nodes, &num_edges, directed);
+    else {
+        printf("reserve for future");
+        exit(1);
+    }
+
+    // Allocate the vertex value array
+    int *node_value = (int *)malloc(num_nodes * sizeof(int));
+    if (!node_value) fprintf(stderr, "node_value malloc failed\n");
+    // Allocate the color array
+    int *color = (int *)malloc(num_nodes * sizeof(int));
+    if (!color) fprintf(stderr, "color malloc failed\n");
+
+    // Initialize all the colors to -1
+    // Randomize the value for each vertex
+    for (int i = 0; i < num_nodes; i++) {
+        color[i] = -1;
+        node_value[i] = rand() % RANGE;
+    }
+
+    int *row_d;
+    int *col_d;
+    int *max_d;
+
+    int *color_d;
+    int *node_value_d;
+    int *stop_d;
+
+    // Create device-side buffers for the graph
+    err = hipMalloc(&row_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n",  num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&col_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc col_d (size:%d): %s\n",  num_edges , hipGetErrorString(err));
+        return -1;
+    }
+
+    // Termination variable
+    err = hipMalloc(&stop_d, sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n",  1 , hipGetErrorString(err));
+        return -1;
+    }
+
+    // Create device-side buffers for color
+    err = hipMalloc(&color_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc color_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&node_value_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc node_value_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&max_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc max_d (size:%d) => %s\n",  num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+
+    // Copy data to device-side buffers
+//    double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+    m5_work_begin(0, 0);
+#endif
+
+    err = hipMemcpy(color_d, color, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy color_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(max_d, color, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy max_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(row_d, csr->row_array, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(node_value_d, node_value, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy node_value_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    int block_size = 256;
+    int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+    // Set up kernel dimensions
+    dim3 threads(block_size,  1, 1);
+    dim3 grid(num_blocks, 1,  1);
+
+    int stop = 1;
+    int graph_color = 1;
+
+    // Main computation loop
+//    double timer3 = gettime();
+
+    while (stop) {
+
+        stop = 0;
+
+        // Copy the termination variable to the device
+        err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: write stop_d: %s\n", hipGetErrorString(err));
+        }
+
+        // Launch the color kernel 1
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(color1), dim3(grid), dim3(threads ), 0, 0, row_d, col_d, node_value_d, color_d,
+                                     stop_d, max_d, graph_color, num_nodes,
+                                     num_edges);
+
+        // Launch the color kernel 2
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(color2), dim3(grid), dim3(threads ), 0, 0, node_value_d, color_d, max_d, graph_color,
+                                     num_nodes, num_edges);
+
+        err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: read stop_d: %s\n", hipGetErrorString(err));
+        }
+
+        // Increment the color for the next iter
+        graph_color++;
+
+    }
+    hipDeviceSynchronize();
+
+//    double timer4 = gettime();
+
+    // Copy back the color array
+    err = hipMemcpy(color, color_d, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+    if (err != hipSuccess) {
+        printf("ERROR: hipMemcpy(): %s\n", hipGetErrorString(err));
+        return -1;
+    }
+
+#ifdef GEM5_FUSION
+    m5_work_end(0, 0);
+#endif
+
+//    double timer2 = gettime();
+
+    // Print out color and timing statistics
+    printf("total number of colors used: %d\n", graph_color);
+//    printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+//    printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+    // Dump the color array into an output file
+    print_vector(color, num_nodes);
+#endif
+
+    // Free host-side buffers
+    free(node_value);
+    free(color);
+    csr->freeArrays();
+    free(csr);
+
+    // Free CUDA buffers
+    hipFree(row_d);
+    hipFree(col_d);
+    hipFree(max_d);
+    hipFree(color_d);
+    hipFree(node_value_d);
+    hipFree(stop_d);
+
+    return 0;
+
+}
+
+void print_vector(int *vector, int num)
+{
+    FILE * fp = fopen("result.out", "w");
+    if (!fp) {
+        printf("ERROR: unable to open result.txt\n");
+    }
+
+    for (int i = 0; i < num; i++)
+        fprintf(fp, "%d: %d\n", i + 1, vector[i]);
+
+    fclose(fp);
+}

diff --git a/src/gpu/pannotia/color/coloring_maxmin.cpp b/src/gpu/pannotia/color/coloring_maxmin.cpp
new file mode 100644
index 0000000..01e173d
--- /dev/null
+++ b/src/gpu/pannotia/color/coloring_maxmin.cpp

@@ -0,0 +1,306 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright © 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel_maxmin.cu"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#define RANGE 2048
+
+void print_vector(int *vector, int num);
+
+int main(int argc, char **argv)
+{
+    char *tmpchar;
+
+    int num_nodes;
+    int num_edges;
+    int file_format = 1;
+    bool directed = 0;
+
+    hipError_t err = hipSuccess;
+
+    if (argc == 3) {
+        tmpchar = argv[1];  //graph inputfile
+        file_format = atoi(argv[2]); //graph format
+    } else {
+        fprintf(stderr, "You did something wrong!\n");
+        exit(1);
+    }
+
+    srand(7);
+
+    // Allocate the CSR structure
+    csr_array *csr;
+
+    // Parse graph file and store into a CSR format
+    if (file_format == 1)
+        csr = parseMetis(tmpchar, &num_nodes, &num_edges, directed);
+    else if (file_format == 0)
+        csr = parseCOO(tmpchar, &num_nodes, &num_edges, directed);
+    else {
+        printf("reserve for future");
+        exit(1);
+    }
+
+    // Allocate the vertex value array
+    int *node_value = (int *)malloc(num_nodes * sizeof(int));
+    if (!node_value) fprintf(stderr, "node_value malloc failed\n");
+    // Allocate the color array
+    int *color = (int *)malloc(num_nodes * sizeof(int));
+    if (!color) fprintf(stderr, "color malloc failed\n");
+
+    // Initialize all the colors to -1
+    // Randomize the value for each vertex
+    for (int i = 0; i < num_nodes; i++) {
+        color[i] = -1;
+        node_value[i] = rand() % RANGE;
+    }
+
+    int *row_d;
+    int *col_d;
+    int *max_d;
+    int *min_d;
+
+    int *color_d;
+    int *node_value_d;
+    int *stop_d;
+
+    // Create device-side buffers for the graph
+    err = hipMalloc(&row_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n",  num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&col_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc col_d (size:%d): %s\n",  num_edges , hipGetErrorString(err));
+        return -1;
+    }
+
+    // Termination variable
+    err = hipMalloc(&stop_d, sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n",  1 , hipGetErrorString(err));
+        return -1;
+    }
+
+    // Create device-side buffers for color
+    err = hipMalloc(&color_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc color_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&node_value_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc node_value_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&max_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc max_d (size:%d) => %s\n",  num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&min_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc min_d (size:%d) => %s\n",  num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+
+    // Copy data to device-side buffers
+    double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+    m5_work_begin(0, 0);
+#endif
+
+    err = hipMemcpy(color_d, color, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy color_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(row_d, csr->row_array, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(node_value_d, node_value, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy node_value_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    int block_size = 256;
+    int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+    // Set up kernel dimensions
+    dim3 threads(block_size,  1, 1);
+    dim3 grid(num_blocks, 1,  1);
+
+    int stop = 1;
+    int graph_color = 1;
+
+    // Initialize arrays
+    hipLaunchKernelGGL(ini, dim3(grid), dim3(threads ), 0, 0, max_d, min_d, num_nodes);
+
+    // Main computation loop
+    double timer3 = gettime();
+
+    while (stop) {
+
+        stop = 0;
+
+        // Copy the termination variable to the device
+        err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: write stop_d: %s\n", hipGetErrorString(err));
+        }
+
+        // Launch the color kernel 1
+        hipLaunchKernelGGL(color1, dim3(grid), dim3(threads ), 0, 0, row_d, col_d, node_value_d, color_d,
+                                     stop_d, max_d, min_d, graph_color,
+                                     num_nodes, num_edges);
+
+        // Launch the color kernel 2
+        hipLaunchKernelGGL(color2, dim3(grid), dim3(threads ), 0, 0, node_value_d, color_d, max_d, min_d,
+                                     graph_color, num_nodes, num_edges);
+
+        err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: read stop_d: %s\n", hipGetErrorString(err));
+        }
+
+        // Update the color label for the next iter
+        graph_color = graph_color + 2;
+
+    }
+    hipDeviceSynchronize();
+
+    double timer4 = gettime();
+
+    // Copy back the color array
+    err = hipMemcpy(color, color_d, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+    if (err != hipSuccess) {
+        printf("ERROR: hipMemcpy(): %s\n", hipGetErrorString(err));
+        return -1;
+    }
+
+#ifdef GEM5_FUSION
+    m5_work_end(0, 0);
+#endif
+
+    double timer2 = gettime();
+
+    // Print out color and timing statistics
+    printf("total number of colors used: %d\n", graph_color);
+    printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+    printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+    // Dump the color array into an output file
+    print_vector(color, num_nodes);
+#endif
+
+    // Free host-side buffers
+    free(node_value);
+    free(color);
+    csr->freeArrays();
+    free(csr);
+
+    // Free CUDA buffers
+    hipFree(row_d);
+    hipFree(col_d);
+    hipFree(max_d);
+    hipFree(color_d);
+    hipFree(node_value_d);
+    hipFree(stop_d);
+
+    return 0;
+
+}
+
+void print_vector(int *vector, int num)
+{
+    FILE * fp = fopen("result.out", "w");
+    if (!fp) {
+        printf("ERROR: unable to open result.txt\n");
+    }
+
+    for (int i = 0; i < num; i++)
+        fprintf(fp, "%d: %d\n", i + 1, vector[i]);
+
+    fclose(fp);
+}

diff --git a/src/gpu/pannotia/color/kernel_max.h b/src/gpu/pannotia/color/kernel_max.h
new file mode 100644
index 0000000..f560a29
--- /dev/null
+++ b/src/gpu/pannotia/color/kernel_max.h

@@ -0,0 +1,139 @@
+/************************************************************************************\
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#ifndef KERNEL_MAX_H
+#define KERNEL_MAX_H
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief   color kernel 1
+ * @param   row         CSR pointer array
+ * @param   col         CSR column array
+ * @param   node_value  Vertex value array
+ * @param   color_array Color value array
+ * @param   stop        Termination variable
+ * @param   max_d       Max array
+ * @param   color       Current color label
+ * @param   num_nodes   Number of vertices
+ * @param   num_edges   Number of edges
+ */
+__global__ void color1(int *row, int *col, int *node_value, int *color_array,
+                       int *stop, int *max_d, const int color,
+                       const int num_nodes, const int num_edges)
+{
+    // Get my thread workitem id
+    int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        // If the vertex is still not colored
+        if (color_array[tid] == -1) {
+
+            // Get the start and end pointer of the neighbor list
+            int start = row[tid];
+            int end;
+            if (tid + 1 < num_nodes)
+                end = row[tid + 1];
+            else
+                end = num_edges;
+
+            int maximum = -1;
+            // Navigate the neighbor list
+            for (int edge = start; edge < end; edge++) {
+                // Determine if the vertex value is the maximum in the neighborhood
+                if (color_array[col[edge]] == -1 && start != end - 1) {
+                    *stop = 1;
+                    if (node_value[col[edge]] > maximum)
+                        maximum = node_value[col[edge]];
+                }
+            }
+            // Assign maximum the max array
+            max_d[tid] = maximum;
+        }
+    }
+}
+
+
+/**
+ * @brief   color kernel 2
+ * @param   node_value  Vertex value array
+ * @param   color_array Color value array
+ * @param   max_d       Max array
+ * @param   color       Current color label
+ * @param   num_nodes   Number of vertices
+ * @param   num_edges   Number of edges
+ */
+__global__ void color2(int *node_value, int *color_array, int *max_d,
+                       const int color, const int num_nodes,
+                       const int num_edges)
+{
+    // Get my workitem id
+    int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        // If the vertex is still not colored
+        if (color_array[tid] == -1) {
+            if (node_value[tid] >= max_d[tid])
+                // Assign a color
+                color_array[tid] = color;
+        }
+    }
+
+}
+
+#endif // KERNEL_MAX_H

diff --git a/src/gpu/pannotia/color/kernel_maxmin.h b/src/gpu/pannotia/color/kernel_maxmin.h
new file mode 100644
index 0000000..eb93f80
--- /dev/null
+++ b/src/gpu/pannotia/color/kernel_maxmin.h

@@ -0,0 +1,167 @@
+/************************************************************************************\
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#ifndef KERNEL_MAXMIN_H
+#define KERNEL_MAXMIN_H
+
+#include "hip/hip_runtime.h"
+
+#define BIG_NUM 999999
+
+/**
+ * @brief   color kernel 1
+ * @param   row         CSR pointer array
+ * @param   col         CSR column array
+ * @param   node_value  Vertex value array
+ * @param   color_array Color value array
+ * @param   stop        Termination variable
+ * @param   max_d       Max array
+ * @param   max_d       Min array
+ * @param   color       Current color label
+ * @param   num_nodes   Number of vertices
+ * @param   num_edges   Number of edges
+ */
+__global__ void color1(int *row, int *col, int *node_value, int *color_array,
+                       int *stop, int *max_d, int *min_d, const int color,
+                       const int num_nodes, const int num_edges)
+{
+    // Get my workitem id
+    int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (tid < num_nodes) {
+        // If the vertex is not colored
+        if (color_array[tid] == -1) {
+
+            // Get the start and end pointers for the neighbor list
+            int start = row[tid];
+            int end;
+            if (tid + 1 < num_nodes)
+                end = row[tid + 1];
+            else
+                end = num_edges;
+
+            int maximum = -1;
+            int minimum = BIG_NUM;
+            // Navigate the neighborlist
+            for (int edge = start; edge < end; edge++) {
+                if (color_array[col[edge]] == -1 && start != end - 1) {
+                    *stop = 1;
+                    // Determine if the vertex value is the maximum/minimum in the neighborhood
+                    if (node_value[col[edge]] > maximum)
+                        maximum = node_value[col[edge]];
+                    if (node_value[col[edge]] < minimum)
+                        minimum = node_value[col[edge]];
+                }
+            }
+            // Assign the maximum/miminum value to max/min array
+            max_d[tid] = maximum;
+            min_d[tid] = minimum;
+        }
+    }
+}
+
+/**
+ * @brief   color kernel 2
+ * @param   node_value  Vertex value array
+ * @param   color_array Color value array
+ * @param   max_d       Max array
+ * @param   min_d       Min array
+ * @param   color       Current color label
+ * @param   num_nodes   Number of vertices
+ * @param   num_edges   Number of edges
+ */
+__global__ void color2(int *node_value, int *color_array, int *max_d,
+                       int *min_d, const int color, const int num_nodes,
+                       const int num_edges)
+{
+    // Get my workitem id
+    int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (tid < num_nodes) {
+        // If the vertex is still not colored
+        if (color_array[tid] == -1) {
+            // Assign a color
+            if (node_value[tid] >= max_d[tid])
+                color_array[tid] = color;
+            if (node_value[tid] <= min_d[tid])
+                color_array[tid] = color + 1;
+        }
+    }
+
+}
+
+/**
+ * @brief   init kernel
+ * @param   max_d       Max array
+ * @param   min_d       Min array
+ * @param   num_nodes   Number of vertices
+ */
+__global__ void ini(int *max_d, int *min_d, const int num_nodes)
+{
+    // Get my workitem id
+    int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // Initialize max: -1 and min: Big_num
+    if (tid < num_nodes) {
+        max_d[tid] = -1;
+        min_d[tid] = BIG_NUM;
+    }
+
+}
+
+#endif

diff --git a/src/gpu/pannotia/fw/Floyd-Warshall.cpp b/src/gpu/pannotia/fw/Floyd-Warshall.cpp
new file mode 100644
index 0000000..75befaa
--- /dev/null
+++ b/src/gpu/pannotia/fw/Floyd-Warshall.cpp

@@ -0,0 +1,237 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright © 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+//#include <omp.h>
+#include "../graph_parser/util.h"
+#include "kernel.h"
+#include "parse.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#ifdef GEM5_FUSION
+#define MAX_ITERS 192
+#else
+#include <stdint.h>
+#define MAX_ITERS INT32_MAX
+#endif
+
+#define BIGNUM 999999
+#define TRUE 1
+#define FALSE 0
+
+int main(int argc, char **argv)
+{
+    char *tmpchar;
+    bool verify_results = false;
+
+    int num_nodes;
+    int num_edges;
+
+    hipError_t err = hipSuccess;
+
+    // Get program input
+    if (argc >= 2) {
+        tmpchar = argv[1];  // Graph input file
+    } else {
+        fprintf(stderr, "You did something wrong!\n");
+        exit(1);
+    }
+
+    if (argc >= 3) {
+        if (atoi(argv[2]) == 1) {
+            verify_results = true;
+        }
+    }
+
+    // Parse the adjacency matrix
+    int *adjmatrix = parse_graph_file(&num_nodes, &num_edges, tmpchar);
+    int dim = num_nodes;
+
+    // Initialize the distance matrix
+    int *distmatrix = (int *)malloc(dim * dim * sizeof(int));
+    if (!distmatrix) fprintf(stderr, "malloc failed - distmatrix\n");
+
+    // Initialize the result matrix
+    int *result = (int *)malloc(dim * dim * sizeof(int));
+    if (!result) fprintf(stderr, "malloc failed - result\n");
+
+    // TODO: Now only supports integer weights
+    // Setup the input matrix
+    for (int i = 0 ; i < dim; i++) {
+        for (int j = 0 ; j < dim; j++) {
+            if (i == j) {
+                // Diagonal
+                distmatrix[i * dim + j] = 0;
+            } else if (adjmatrix[i * dim + j] == -1) {
+                // Without edge
+                distmatrix[i * dim + j] = BIGNUM;
+            } else {
+                // With edge
+                distmatrix[i * dim + j] = adjmatrix[i * dim + j];
+            }
+        }
+    }
+
+    int *dist_d;
+    int *next_d;
+
+    // Create device-side FW buffers
+    err = hipMalloc(&dist_d, dim * dim * sizeof(int));
+    if (err != hipSuccess) {
+        printf("ERROR: hipMalloc dist_d (size:%d) => %d\n",  dim * dim , err);
+        return -1;
+    }
+    err = hipMalloc(&next_d, dim * dim * sizeof(int));
+    if (err != hipSuccess) {
+        printf("ERROR: hipMalloc next_d (size:%d) => %d\n",  dim * dim , err);
+        return -1;
+    }
+
+    //double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+    m5_work_begin(0, 0);
+#endif
+
+    // Copy the dist matrix to the device
+    err = hipMemcpy(dist_d, distmatrix, dim * dim * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy feature_d (size:%d) => %d\n", dim * dim, err);
+        return -1;
+    }
+
+    // Work dimension
+    dim3 threads(16, 16, 1);
+    dim3 grid(num_nodes / 16, num_nodes / 16, 1);
+
+    //double timer3 = gettime();
+    // Main computation loop
+    for (int k = 1; k < dim && k < MAX_ITERS; k++) {
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(floydwarshall), dim3(grid), dim3(threads), 0, 0, dist_d, next_d, dim, k);
+    }
+    hipDeviceSynchronize();
+
+    //double timer4 = gettime();
+    err = hipMemcpy(result, dist_d, dim * dim * sizeof(int), hipMemcpyDeviceToHost);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR:  read back dist_d %d failed\n", err);
+        return -1;
+    }
+
+#ifdef GEM5_FUSION
+    m5_work_end(0, 0);
+#endif
+
+    //double timer2 = gettime();
+
+    //printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+    //printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+    if (verify_results) {
+        // Below is the verification part
+        // Calculate on the CPU
+        int *dist = distmatrix;
+        for (int k = 0; k < dim; k++) {
+            for (int i = 0; i < dim; i++) {
+                for (int j = 0; j < dim; j++) {
+                    if (dist[i * dim + k] + dist[k * dim + j] < dist[i * dim + j]) {
+                        dist[i * dim + j] = dist[i * dim + k] + dist[k * dim + j];
+                    }
+                }
+            }
+        }
+
+        // Compare results
+        bool check_flag = 0;
+        for (int i = 0; i < dim; i++) {
+            for (int j = 0; j < dim; j++) {
+                if (dist[i * dim + j] !=  result[i * dim + j]) {
+                    fprintf(stderr, "mismatch at (%d, %d)\n", i, j);
+                    check_flag = 1;
+                }
+            }
+        }
+        // If there is mismatch, report
+        if (check_flag) {
+            fprintf(stdout, "WARNING: Produced incorrect results!\n");
+        } else {
+            printf("Results are correct!\n");
+        }
+    }
+
+    printf("Finishing Floyd-Warshall\n");
+
+    // Free host-side buffers
+    free(adjmatrix);
+    free(result);
+    free(distmatrix);
+
+    // Free CUDA buffers
+    hipFree(dist_d);
+    hipFree(next_d);
+
+    return 0;
+}

diff --git a/src/gpu/pannotia/fw/Makefile b/src/gpu/pannotia/fw/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/fw/Makefile

@@ -0,0 +1,11 @@
+default:
+	make -f Makefile.default
+
+clean:
+	make -f Makefile.default clean
+
+gem5-fusion:
+	make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+	make -f Makefile.gem5-fusion clean

diff --git a/src/gpu/pannotia/fw/Makefile.default b/src/gpu/pannotia/fw/Makefile.default
new file mode 100644
index 0000000..afdfcb5
--- /dev/null
+++ b/src/gpu/pannotia/fw/Makefile.default

@@ -0,0 +1,19 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+EXECUTABLE = fw_hip
+OPTS = -O3
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): Floyd-Warshall.cpp parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) parse.cpp ../graph_parser/util.cpp Floyd-Warshall.cpp -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/fw/Makefile.gem5-fusion b/src/gpu/pannotia/fw/Makefile.gem5-fusion
new file mode 100644
index 0000000..e9fbd79
--- /dev/null
+++ b/src/gpu/pannotia/fw/Makefile.gem5-fusion

@@ -0,0 +1,23 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+# TODO: Need some sort of explicit PATH?  Read in?
+GEM5_PATH ?= /nobackup/sinclair/gem5
+CFLAGS += -I$(GEM5_PATH)/include
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/fw_hip.gem5
+
+$(BIN_DIR)/fw_hip.gem5: Floyd-Warshall.cpp parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) -O3 --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) parse.cpp ../graph_parser/util.cpp Floyd-Warshall.cpp -DGEM5_FUSION -o $(BIN_DIR)/fw_hip.gem5 $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/fw/README.md b/src/gpu/pannotia/fw/README.md
new file mode 100644
index 0000000..39424bb
--- /dev/null
+++ b/src/gpu/pannotia/fw/README.md

@@ -0,0 +1,49 @@
+---
+title: Pannotia FW Test
+tags:
+    - x86
+    - amdgpu
+layout: default
+permalink: resources/pannotia/fw
+shortdoc: >
+    Resources to build a disk image with the GCN3 Pannotia FW workload.
+---
+
+Floyd-Warshall (FW) is a graph analytics application that is part of the Pannotia benchmark suite.  It is a classical dynamic-programming algorithm designed to solve the all-pairs shortest path (APSP) problem.  The provided version is for use with the gpu-compute model of gem5.  Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling FW, compiling the GCN3_X86/Vega_X86 versions of gem5, and running FW on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+To compile FW:
+
+```
+cd src/gpu/pannotia/fw
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead.  Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/).  By default, the Makefile builds for gfx801 and gfx803, and is placed in the src/gpu/pannotia/fw/bin folder.
+
+## Compiling GCN3_X86/gem5.opt
+
+FW is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture.   The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running FW on GCN3_X86/gem5.opt
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/fw/bin -c fw_hip.gem5 --options="1k_128k.gr"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>.  We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/fw/1k_128k.gr>), as this is the smallest input that can be run with FW.  Note that 1k_128k is not designed for FW specifically though -- the above link has larger graphs designed to run with FW that you should consider using for larger experiments.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.

diff --git a/src/gpu/pannotia/fw/kernel.h b/src/gpu/pannotia/fw/kernel.h
new file mode 100644
index 0000000..3474a2f
--- /dev/null
+++ b/src/gpu/pannotia/fw/kernel.h

@@ -0,0 +1,88 @@
+/************************************************************************************\
+ *                                                                                  *
+ * Copyright © 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief   naive floyd warshal kernel
+ * @param   dist  Distance array
+ * @param   next  Next array
+ * @param   dim   Dimension of the 2-D matrix
+ * @param   k     Current iteration number
+ */
+__global__ void
+floydwarshall(int *dist, int *next, int dim, int k)
+{
+    // Get my workitem id x_dim
+    int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+    // Get my workitem id y_dim
+    int j = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
+
+    if (i < dim && j < dim) {
+        // if (dist i -> k + k -> j) update the dist i-> j
+        if (dist[i * dim + k] + dist[k * dim + j] < dist[i * dim + j]) {
+            dist[i * dim + j] = dist[i * dim + k] + dist[k * dim + j];
+            next[i * dim + j] = k;
+        }
+    }
+}
+
+#endif // KERNEL_H

diff --git a/src/gpu/pannotia/fw/parse.cpp b/src/gpu/pannotia/fw/parse.cpp
new file mode 100644
index 0000000..eab11ea
--- /dev/null
+++ b/src/gpu/pannotia/fw/parse.cpp

@@ -0,0 +1,89 @@
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Test value
+bool test_value(int* array, int dim, int i, int j)
+{
+
+    // TODO: Current does not support multiple edges between two vertices
+    if (array[i * dim + j] != -1) {
+        // fprintf(stderr, "Possibly duplicate records at (%d, %d)\n", i, j);
+        return 0;
+    } else
+        return 1;
+}
+
+// Set value (i, j) = value
+void set_value(int* array, int dim, int i, int j, int value)
+{
+    array[i * dim + j] = value;
+}
+
+int* parse_graph_file(int *num_nodes, int *num_edges, char* tmpchar)
+{
+
+    int *adjmatrix;
+    int cnt = 0;
+    unsigned int lineno = 0;
+    char line[128], sp[2], a, p;
+
+    FILE *fptr;
+
+    fptr = fopen(tmpchar, "r");
+
+    if (!fptr) {
+        fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+        perror("fopen Error:");
+        exit(1);
+    }
+
+    printf("Opening file: %s\n", tmpchar);
+
+    while (fgets(line, 100, fptr)) {
+        int head, tail, weight;
+        long long unsigned size;
+        switch (line[0]) {
+        case 'c':
+            break;
+        case 'p':
+            sscanf(line, "%c %s %d %d", &p, sp, num_nodes, num_edges);
+            printf("Read from file: num_nodes = %d, num_edges = %d\n", *num_nodes, *num_edges);
+            size = (long long unsigned)(*num_nodes + 1) * (long long unsigned)(*num_nodes + 1);
+            if (size > UINT_MAX) {
+                fclose(fptr);
+                fprintf(stderr, "ERROR: Too many nodes, huge adjacency matrix\n");
+                exit(0);
+            }
+            adjmatrix = (int *)malloc(size * sizeof(int));
+            memset(adjmatrix, -1 , size * sizeof(int));
+            break;
+        case 'a':
+            sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+            if (tail == head) printf("reporting self loop\n");
+            if (test_value(adjmatrix, *num_nodes + 1, head, tail)) {
+                set_value(adjmatrix, *num_nodes + 1, head, tail, weight);
+                cnt++;
+            }
+
+#ifdef VERBOSE
+            printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+            break;
+        default:
+            fprintf(stderr, "exiting loop\n");
+            break;
+        }
+        lineno++;
+    }
+
+    *num_edges = cnt;
+    printf("Actual added edges: %d\n", cnt);
+
+    fclose(fptr);
+
+    return adjmatrix;
+
+}

diff --git a/src/gpu/pannotia/fw/parse.h b/src/gpu/pannotia/fw/parse.h
new file mode 100644
index 0000000..01001e2
--- /dev/null
+++ b/src/gpu/pannotia/fw/parse.h

@@ -0,0 +1,6 @@
+#ifndef __FW_PARSE_H__
+#define __FW_PARSE_H__
+
+int* parse_graph_file(int *num_nodes, int *num_edges, char* tmpchar);
+
+#endif

diff --git a/src/gpu/pannotia/graph_parser/parse.cpp b/src/gpu/pannotia/graph_parser/parse.cpp
new file mode 100644
index 0000000..80fb6f4
--- /dev/null
+++ b/src/gpu/pannotia/graph_parser/parse.cpp

@@ -0,0 +1,882 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "parse.h"
+#include "stdlib.h"
+#include "stdio.h"
+#include <string.h>
+#include <algorithm>
+#include <sys/time.h>
+#include "util.h"
+
+bool doCompare(CooTuple elem1, CooTuple elem2)
+{
+    if (elem1.row < elem2.row) {
+        return true;
+    }
+    return false;
+}
+
+ell_array *csr2ell(csr_array *csr, int num_nodes, int num_edges, int fill)
+{
+    int size, maxheight = 0;
+    for (int i = 0; i < num_nodes; i++) {
+        size = csr->row_array[i + 1] - csr->row_array[i];
+        if (size > maxheight)
+            maxheight = size;
+    }
+
+    ell_array *ell = (ell_array *)malloc(sizeof(ell_array));
+    if (!ell) printf("malloc failed");
+
+    ell->max_height = maxheight;
+    ell->num_nodes = num_nodes;
+
+    ell->col_array = (int*)malloc(sizeof(int) * maxheight * num_nodes);
+    ell->data_array = (int*)malloc(sizeof(int) * maxheight * num_nodes);
+
+
+    for (int i = 0; i < maxheight * num_nodes; i++) {
+        ell->col_array[i] = 0;
+        ell->data_array[i] = fill;
+    }
+
+    for (int i = 0; i < num_nodes; i++) {
+        int start = csr->row_array[i];
+        int end = csr->row_array[i + 1];
+        int lastcolid = 0;
+        for (int j = start; j < end; j++) {
+            int colid = csr->col_array[j];
+            int data = csr->data_array[j];
+            ell->col_array[i + (j - start) * num_nodes] = colid;
+            ell->data_array[i + (j - start) * num_nodes] = data;
+            lastcolid = colid;
+        }
+        for (int j = end; j < start + maxheight; j++) {
+            ell->col_array[i + (j - start) * num_nodes] = lastcolid;
+            ell->data_array[i + (j - start) * num_nodes] = fill;
+        }
+    }
+
+    return ell;
+
+}
+
+csr_array *parseMetis(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+
+    int cnt = 0;
+    unsigned int lineno = 0;
+    char *line = (char *)malloc(8192);
+    int num_edges = 0, num_nodes = 0;
+
+    FILE *fptr;
+    CooTuple *tuple_array = NULL;
+
+    fptr = fopen(tmpchar, "r");
+    if (!fptr) {
+        fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+        exit(1);
+    }
+
+    printf("Opening file: %s\n", tmpchar);
+
+    while (fgets(line, 8192, fptr)) {
+        int head, tail, weight = 0;
+        CooTuple temp;
+
+        if (line[0] == '%') continue; // skip comment lines
+
+        if (lineno == 0) { //the first line
+
+            sscanf(line, "%d %d", p_num_nodes, p_num_edges);
+            if (!directed) {
+                *p_num_edges = *p_num_edges * 2;
+                printf("This is an undirected graph\n");
+            } else {
+                printf("This is a directed graph\n");
+            }
+            num_nodes = *p_num_nodes;
+            num_edges = *p_num_edges;
+
+
+            printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+            tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+        } else if (lineno > 0) { //from the second line
+
+            char *pch;
+            pch = strtok(line , " ,.-");
+            while (pch != NULL) {
+                head = lineno;
+                tail = atoi(pch);
+                if (tail <= 0)  break;
+
+                if (tail == head) printf("reporting self loop: %d, %d\n", lineno + 1, lineno);
+
+                temp.row = head - 1;
+                temp.col = tail - 1;
+                temp.val = weight;
+
+                tuple_array[cnt++] = temp;
+
+                pch = strtok(NULL, " ,.-");
+
+            }
+        }
+
+#ifdef VERBOSE
+        printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+
+        lineno++;
+
+    }
+
+    // Metis files are stored in row-order, so sorting is unnecessary
+    // std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+    for (int i = 0 ; i < num_edges; i++) {
+        printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+    }
+#endif
+
+    int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+    int *col_array = (int *)malloc(num_edges * sizeof(int));
+    int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+    int row_cnt = 0;
+    int prev = -1;
+    int idx;
+    for (idx = 0; idx < num_edges; idx++) {
+        int curr = tuple_array[idx].row;
+        if (curr != prev) {
+            row_array[row_cnt++] = idx;
+            prev = curr;
+        }
+        col_array[idx] = tuple_array[idx].col;
+        data_array[idx] = tuple_array[idx].val;
+
+    }
+    row_array[row_cnt] = idx;
+
+    csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+    memset(csr, 0, sizeof(csr_array));
+    csr->row_array = row_array;
+    csr->col_array = col_array;
+    csr->data_array = data_array;
+
+    fclose(fptr);
+    free(tuple_array);
+    free(line);
+
+    return csr;
+
+}
+
+
+csr_array *parseCOO(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+    int cnt = 0;
+    unsigned int lineno = 0;
+    char line[128], sp[2], a, p;
+    int num_nodes = 0, num_edges = 0;
+
+    FILE *fptr;
+    CooTuple *tuple_array = NULL;
+
+    fptr = fopen(tmpchar, "r");
+    if (!fptr) {
+        fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+        exit(1);
+    }
+
+    printf("Opening file: %s\n", tmpchar);
+
+    while (fgets(line, 100, fptr)) {
+        int head, tail, weight;
+        switch (line[0]) {
+        case 'c':
+            break;
+        case 'p':
+            sscanf(line, "%c %s %d %d", &p, sp, p_num_nodes, p_num_edges);
+
+            if (!directed) {
+                *p_num_edges = *p_num_edges * 2;
+                printf("This is an undirected graph\n");
+            } else {
+                printf("This is a directed graph\n");
+            }
+
+            num_nodes = *p_num_nodes;
+            num_edges = *p_num_edges;
+
+            printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+            tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+            break;
+
+        case 'a':
+            sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+            if (tail == head) printf("reporting self loop\n");
+            CooTuple temp;
+            temp.row = head - 1;
+            temp.col = tail - 1;
+            temp.val = weight;
+            tuple_array[cnt++] = temp;
+            if (!directed) {
+                temp.row = tail - 1;
+                temp.col = head - 1;
+                temp.val = weight;
+                tuple_array[cnt++] = temp;
+            }
+
+#ifdef VERBOSE
+            printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+            break;
+        default:
+            fprintf(stderr, "exiting loop\n");
+            break;
+        }
+        lineno++;
+    }
+
+    std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+    for (int i = 0 ; i < num_edges; i++) {
+        printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+    }
+#endif
+
+    int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+    int *col_array = (int *)malloc(num_edges * sizeof(int));
+    int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+    int row_cnt = 0;
+    int prev = -1;
+    int idx;
+    for (idx = 0; idx < num_edges; idx++) {
+        int curr = tuple_array[idx].row;
+        if (curr != prev) {
+            row_array[row_cnt++] = idx;
+            prev = curr;
+        }
+
+        col_array[idx] = tuple_array[idx].col;
+        data_array[idx] = tuple_array[idx].val;
+    }
+
+    row_array[row_cnt] = idx;
+
+    fclose(fptr);
+    free(tuple_array);
+
+    csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+    memset(csr, 0, sizeof(csr_array));
+    csr->row_array = row_array;
+    csr->col_array = col_array;
+    csr->data_array = data_array;
+
+    return csr;
+
+}
+
+// Parse Metis file with double edges
+double_edges *parseMetis_doubleEdge(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+    int cnt = 0;
+    unsigned int lineno = 0;
+    char line[4096];
+    int num_edges = 0, num_nodes = 0;
+    FILE *fptr;
+    CooTuple *tuple_array = NULL;
+
+    fptr = fopen(tmpchar, "r");
+    if (!fptr) {
+        fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+        exit(1);
+    }
+
+    printf("Opening file: %s\n", tmpchar);
+
+    while (fgets(line, 4096, fptr)) {
+        int head, tail, weight = 0;
+        CooTuple temp;
+
+        if (line[0] == '%') continue; // skip comment lines
+
+        if (lineno == 0) { //the first line
+
+            sscanf(line, "%d %d", p_num_nodes, p_num_edges);
+            if (!directed) {
+                *p_num_edges = *p_num_edges * 2;
+                printf("This is an undirected graph\n");
+            } else {
+                printf("This is a directed graph\n");
+            }
+
+            num_nodes = *p_num_nodes;
+            num_edges = *p_num_edges;
+
+            printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+            tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+            if (!tuple_array) printf("xxxxxxxx\n");
+
+        } else if (lineno > 0) { //from the second line
+            char *pch;
+            pch = strtok(line , " ,.-");
+            while (pch != NULL) {
+                head = lineno;
+                tail = atoi(pch);
+                if (tail <= 0) break;
+
+                if (tail == head) printf("reporting self loop: %d, %d\n", lineno + 1, lineno);
+
+                temp.row = head - 1;
+                temp.col = tail - 1;
+                temp.val = weight;
+
+                tuple_array[cnt++] = temp;
+
+                pch = strtok(NULL, " ,.-");
+            }
+        }
+
+#ifdef VERBOSE
+        printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+
+        lineno++;
+    }
+
+    // Metis files are stored in row-order, so sorting is unnecessary
+    // std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+    for (int i = 0 ; i < num_edges; i++) {
+        printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+    }
+#endif
+
+    int *edge_array1 = (int *)malloc(num_edges * sizeof(int));
+    int *edge_array2 = (int *)malloc(num_edges * sizeof(int));
+
+    for (int i = 0; i < num_edges; i++) {
+        edge_array1[i] = tuple_array[i].row;
+        edge_array2[i] = tuple_array[i].col;
+    }
+
+    fclose(fptr);
+    free(tuple_array);
+
+    double_edges *de = (double_edges *)malloc(sizeof(double_edges));
+    de->edge_array1 = edge_array1;
+    de->edge_array2 = edge_array2;
+
+    return de;
+
+}
+
+// Parse COO file with double edges
+double_edges *parseCOO_doubleEdge(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+    int cnt = 0;
+    unsigned int lineno = 0;
+    char line[128], sp[2], a, p;
+    int num_nodes = 0, num_edges = 0;
+
+    FILE *fptr;
+    CooTuple *tuple_array = NULL;
+
+    fptr = fopen(tmpchar, "r");
+    if (!fptr) {
+        fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+        exit(1);
+    }
+
+    printf("Opening file: %s\n", tmpchar);
+
+    while (fgets(line, 100, fptr)) {
+        int head, tail, weight;
+        switch (line[0]) {
+        case 'c':
+            break;
+        case 'p':
+            sscanf(line, "%c %s %d %d", &p, sp, p_num_nodes, p_num_edges);
+
+            if (!directed) {
+                *p_num_edges = *p_num_edges * 2;
+                printf("This is an undirected graph\n");
+            } else {
+                printf("This is a directed graph\n");
+            }
+
+            num_nodes = *p_num_nodes;
+            num_edges = *p_num_edges;
+
+            printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+            tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+            break;
+        case 'a':
+            sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+            if (tail == head) printf("reporting self loop\n");
+            CooTuple temp;
+            temp.row = head - 1;
+            temp.col = tail - 1;
+            temp.val = weight;
+            tuple_array[cnt++] = temp;
+            if (!directed) {
+                temp.row = tail - 1;
+                temp.col = head - 1;
+                temp.val = weight;
+                tuple_array[cnt++] = temp;
+            }
+
+#ifdef VERBOSE
+            printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+            break;
+        default:
+            fprintf(stderr, "exiting loop\n");
+            break;
+
+        }
+        lineno++;
+    }
+
+    std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+    for (int i = 0 ; i < num_edges; i++) {
+        printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+    }
+#endif
+
+    int *edge_array1 = (int *)malloc(num_edges * sizeof(int));
+    int *edge_array2 = (int *)malloc(num_edges * sizeof(int));
+
+    for (int i = 0; i < num_edges; i++) {
+        edge_array1[i] = tuple_array[i].row;
+        edge_array2[i] = tuple_array[i].col;
+    }
+
+    fclose(fptr);
+    free(tuple_array);
+
+    double_edges *de = (double_edges *)malloc(sizeof(double_edges));
+    de->edge_array1 = edge_array1;
+    de->edge_array2 = edge_array2;
+
+    return de;
+}
+
+// Parse matrix market file
+csr_array *parseMM(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed, bool weight_flag)
+{
+    int cnt = 0;
+    unsigned int lineno = 0;
+    char line[128];
+    int num_nodes = 0, num_edges = 0, num_nodes2 = 0;
+
+    FILE *fptr;
+    CooTuple *tuple_array = NULL;
+
+    fptr = fopen(tmpchar, "r");
+    if (!fptr) {
+        fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+        exit(1);
+    }
+
+    printf("Opening file: %s\n", tmpchar);
+
+    while (fgets(line, 100, fptr)) {
+        int head, tail, weight;
+        if (line[0] == '%') continue;
+        if (lineno == 0) {
+            sscanf(line, "%d %d %d", p_num_nodes, &num_nodes2, p_num_edges);
+            if (!directed) {
+                *p_num_edges = *p_num_edges * 2;
+                printf("This is an undirected graph\n");
+            } else {
+                printf("This is a directed graph\n");
+            }
+
+            num_nodes = *p_num_nodes;
+            num_edges = *p_num_edges;
+
+            printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+            tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+            if (!tuple_array) {
+                printf("tuple array not allocated succesfully\n");
+                exit(1);
+            }
+
+        }
+        if (lineno > 0) {
+
+            if (weight_flag) {
+                sscanf(line, "%d %d %d", &head, &tail, &weight);
+            } else {
+                sscanf(line, "%d %d",  &head, &tail);
+                printf("(%d, %d)\n", head, tail);
+                weight = 0;
+            }
+
+            if (tail == head) {
+                printf("reporting self loop\n");
+                continue;
+            };
+
+            CooTuple temp;
+            temp.row = head - 1;
+            temp.col = tail - 1;
+            temp.val = weight;
+            tuple_array[cnt++] = temp;
+
+            if (!directed) {
+                temp.row = tail - 1;
+                temp.col = head - 1;
+                temp.val = weight;
+                tuple_array[cnt++] = temp;
+            }
+
+#ifdef VERBOSE
+            printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+        }
+        lineno++;
+    }
+
+    std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+    for (int i = 0 ; i < num_edges; i++) {
+        printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+    }
+#endif
+
+    int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+    int *col_array = (int *)malloc(num_edges * sizeof(int));
+    int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+    int row_cnt = 0;
+    int prev = -1;
+    int idx;
+    for (idx = 0; idx < num_edges; idx++) {
+        int curr = tuple_array[idx].row;
+        if (curr != prev) {
+            row_array[row_cnt++] = idx;
+            prev = curr;
+        }
+
+        col_array[idx] = tuple_array[idx].col;
+        data_array[idx] = tuple_array[idx].val;
+    }
+    row_array[row_cnt] = idx;
+
+    fclose(fptr);
+    free(tuple_array);
+
+    csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+    memset(csr, 0, sizeof(csr_array));
+    csr->row_array = row_array;
+    csr->col_array = col_array;
+    csr->data_array = data_array;
+
+    return csr;
+}
+
+// Parse Metis file with transpose
+csr_array *parseMetis_transpose(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+    int cnt = 0;
+    unsigned int lineno = 0;
+    char *line = (char *)malloc(8192);
+    int num_edges = 0, num_nodes = 0;
+    int *col_cnt = NULL;
+
+    FILE *fptr;
+    CooTuple *tuple_array = NULL;
+
+    fptr = fopen(tmpchar, "r");
+    if (!fptr) {
+        fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+        exit(1);
+    }
+
+    printf("Opening file: %s\n", tmpchar);
+    while (fgets(line, 8192, fptr)) {
+        int head, tail, weight = 0;
+        CooTuple temp;
+
+        if (line[0] == '%') continue; // skip comment lines
+
+        if (lineno == 0) { //the first line
+
+            sscanf(line, "%d %d", p_num_nodes, p_num_edges);
+
+            col_cnt = (int *)malloc(*p_num_nodes * sizeof(int));
+            if (!col_cnt) {
+                printf("memory allocation failed for col_cnt\n");
+                exit(1);
+            }
+            memset(col_cnt, 0, *p_num_nodes * sizeof(int));
+
+            if (!directed) {
+                *p_num_edges = *p_num_edges * 2;
+                printf("This is an undirected graph\n");
+            } else {
+                printf("This is a directed graph\n");
+            }
+            num_nodes = *p_num_nodes;
+            num_edges = *p_num_edges;
+
+            printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+            tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+        } else if (lineno > 0) { //from the second line
+            char *pch;
+            pch = strtok(line , " ,.-");
+            while (pch != NULL) {
+                head = lineno;
+                tail = atoi(pch);
+                if (tail <= 0) {
+                    break;
+                }
+
+                if (tail == head) printf("reporting self loop: %d, %d\n", lineno + 1, lineno);
+
+                if (directed) {
+                    temp.row = tail - 1;
+                    temp.col = head - 1;
+                } else {
+                    // Undirected matrices are symmetric, so there is no need
+                    // to transpose and then re-sort the edges
+                    temp.row = head - 1;
+                    temp.col = tail - 1;
+                }
+                temp.val = weight;
+
+                col_cnt[head - 1]++;
+                if (cnt >= num_edges) {
+                    fprintf(stderr, "Error when opening file: %s.\n" \
+                            "    Check if graph is undirected Metis format\n", tmpchar);
+                    exit(1);
+                }
+                tuple_array[cnt++] = temp;
+
+                pch = strtok(NULL, " ,.-");
+            }
+        }
+#ifdef VERBOSE
+        printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+        lineno++;
+    }
+
+    if (directed) {
+        // Metis files are stored in row-order, so transposed, directed
+        // matrices must be re-sorted!
+        std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+    }
+
+#ifdef VERBOSE
+    for (int i = 0 ; i < num_edges; i++) {
+        printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+    }
+#endif
+
+    int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+    int *col_array = (int *)malloc(num_edges * sizeof(int));
+    int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+    int row_cnt = 0;
+    int prev = -1;
+    int idx;
+    for (idx = 0; idx < num_edges; idx++) {
+        int curr = tuple_array[idx].row;
+        if (curr != prev) {
+            row_array[row_cnt++] = idx;
+            prev = curr;
+        }
+        col_array[idx] = tuple_array[idx].col;
+        data_array[idx] = tuple_array[idx].val;
+    }
+    row_array[row_cnt] = idx;
+
+    csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+    memset(csr, 0, sizeof(csr_array));
+    csr->row_array = row_array;
+    csr->col_array = col_array;
+    csr->data_array = data_array;
+    csr->col_cnt = col_cnt;
+
+    fclose(fptr);
+    free(tuple_array);
+
+    return csr;
+}
+
+// Parse COO file with transpose
+csr_array *parseCOO_transpose(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed)
+{
+    int cnt = 0;
+    unsigned int lineno = 0;
+    char line[128], sp[2], a, p;
+    int num_nodes = 0, num_edges = 0;
+
+    FILE *fptr;
+    CooTuple *tuple_array = NULL;
+
+    fptr = fopen(tmpchar, "r");
+    if (!fptr) {
+        fprintf(stderr, "Error when opening file: %s\n", tmpchar);
+        exit(1);
+    }
+
+    printf("Opening file: %s\n", tmpchar);
+
+    while (fgets(line, 100, fptr)) {
+        int head, tail, weight;
+        switch (line[0]) {
+        case 'c':
+            break;
+        case 'p':
+            fflush(stdout);
+
+            sscanf(line, "%c %s %d %d", &p, sp, p_num_nodes, p_num_edges);
+
+            if (!directed) {
+                *p_num_edges = *p_num_edges * 2;
+                printf("This is an undirected graph\n");
+            } else {
+                printf("This is a directed graph\n");
+            }
+
+            num_nodes = *p_num_nodes;
+            num_edges = *p_num_edges;
+
+            printf("Read from file: num_nodes = %d, num_edges = %d\n", num_nodes, num_edges);
+            tuple_array = (CooTuple *)malloc(sizeof(CooTuple) * num_edges);
+            break;
+
+        case 'a':
+            sscanf(line, "%c %d %d %d", &a, &head, &tail, &weight);
+            if (tail == head) printf("reporting self loop\n");
+            CooTuple temp;
+            temp.val = weight;
+            temp.row = tail - 1;
+            temp.col = head - 1;
+            tuple_array[cnt++] = temp;
+            if (!directed) {
+                temp.val = weight;
+                temp.row = tail - 1;
+                temp.col = head - 1;
+                tuple_array[cnt++] = temp;
+            }
+
+#ifdef VERBOSE
+            printf("Adding edge: %d ==> %d ( %d )\n", head, tail, weight);
+#endif
+            break;
+        default:
+            fprintf(stderr, "exiting loop\n");
+            break;
+        }
+        lineno++;
+    }
+
+    std::stable_sort(tuple_array, tuple_array + num_edges, doCompare);
+
+#ifdef VERBOSE
+    for (int i = 0 ; i < num_edges; i++) {
+        printf("%d: %d, %d, %d\n", i, tuple_array[i].row, tuple_array[i].col, tuple_array[i].val);
+    }
+#endif
+
+    int *row_array = (int *)malloc((num_nodes + 1) * sizeof(int));
+    int *col_array = (int *)malloc(num_edges * sizeof(int));
+    int *data_array = (int *)malloc(num_edges * sizeof(int));
+
+    int row_cnt = 0;
+    int prev = -1;
+    int idx;
+    for (idx = 0; idx < num_edges; idx++) {
+        int curr = tuple_array[idx].row;
+        if (curr != prev) {
+            row_array[row_cnt++] = idx;
+            prev = curr;
+        }
+        col_array[idx] = tuple_array[idx].col;
+        data_array[idx] = tuple_array[idx].val;
+    }
+    while (row_cnt <= num_nodes) {
+        row_array[row_cnt++] = idx;
+    }
+
+    csr_array *csr = (csr_array *)malloc(sizeof(csr_array));
+    memset(csr, 0, sizeof(csr_array));
+    csr->row_array = row_array;
+    csr->col_array = col_array;
+    csr->data_array = data_array;
+
+    fclose(fptr);
+    free(tuple_array);
+
+    return csr;
+}
+

diff --git a/src/gpu/pannotia/graph_parser/parse.h b/src/gpu/pannotia/graph_parser/parse.h
new file mode 100644
index 0000000..e6273ac
--- /dev/null
+++ b/src/gpu/pannotia/graph_parser/parse.h

@@ -0,0 +1,114 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include <stdlib.h>
+
+typedef struct csr_arrays_t {
+    int *row_array;
+    int *col_array;
+    int *data_array;
+    int *col_cnt;
+
+    void freeArrays() {
+        if (row_array) {
+            free(row_array);
+            row_array = NULL;
+        }
+        if (col_array) {
+            free(col_array);
+            col_array = NULL;
+        }
+        if (data_array) {
+            free(data_array);
+            data_array = NULL;
+        }
+        if (col_cnt) {
+            free(col_cnt);
+            col_cnt = NULL;
+        }
+    }
+} csr_array;
+
+typedef struct ell_arrays_t {
+    int max_height;
+    int num_nodes;
+    int *col_array;
+    int *data_array;
+    int *col_cnt;
+} ell_array;
+
+typedef struct double_edges_t {
+    int *edge_array1;
+    int *edge_array2;
+} double_edges;
+
+typedef struct cooedgetuple {
+    int row;
+    int col;
+    int val;
+} CooTuple;
+
+csr_array *parseCOO(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+csr_array *parseMetis(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+csr_array *parseMM(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed, bool weight_flag);
+ell_array *csr2ell(csr_array *csr, int num_nodes, int num_edges, int fill);
+
+double_edges *parseCOO_doubleEdge(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+double_edges *parseMetis_doubleEdge(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+
+csr_array *parseCOO_transpose(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);
+csr_array *parseMetis_transpose(char* tmpchar, int *p_num_nodes, int *p_num_edges, bool directed);

diff --git a/src/gpu/pannotia/graph_parser/util.cpp b/src/gpu/pannotia/graph_parser/util.cpp
new file mode 100644
index 0000000..93105ad
--- /dev/null
+++ b/src/gpu/pannotia/graph_parser/util.cpp

@@ -0,0 +1,67 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *   
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      * 
+ *                                                                                  *  
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  * 
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *  
+ * technologies for which you must obtain licenses from parties other than AMD.     *  
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  * 
+ * underlying intellectual property rights related to the third party technologies. *  
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *        
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    * 
+ * E:2 any restricted technology, software, or source code you receive hereunder,   * 
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    * 
+ * national security controls as identified on the Commerce Control List (currently * 
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   * 
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+double gettime() {
+  struct timeval t;
+  gettimeofday(&t,NULL);
+  return t.tv_sec+t.tv_usec*1e-6;
+}

diff --git a/src/gpu/pannotia/graph_parser/util.h b/src/gpu/pannotia/graph_parser/util.h
new file mode 100644
index 0000000..b05ae2f
--- /dev/null
+++ b/src/gpu/pannotia/graph_parser/util.h

@@ -0,0 +1,58 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *   
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      * 
+ *                                                                                  *  
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  * 
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *  
+ * technologies for which you must obtain licenses from parties other than AMD.     *  
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  * 
+ * underlying intellectual property rights related to the third party technologies. *  
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *        
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    * 
+ * E:2 any restricted technology, software, or source code you receive hereunder,   * 
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    * 
+ * national security controls as identified on the Commerce Control List (currently * 
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   * 
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+double gettime();

diff --git a/src/gpu/pannotia/mis/Makefile b/src/gpu/pannotia/mis/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/mis/Makefile

@@ -0,0 +1,11 @@
+default:
+	make -f Makefile.default
+
+clean:
+	make -f Makefile.default clean
+
+gem5-fusion:
+	make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+	make -f Makefile.gem5-fusion clean

diff --git a/src/gpu/pannotia/mis/Makefile.default b/src/gpu/pannotia/mis/Makefile.default
new file mode 100644
index 0000000..9211ba6
--- /dev/null
+++ b/src/gpu/pannotia/mis/Makefile.default

@@ -0,0 +1,20 @@
+EXECUTABLE = mis_hip
+OPTS = -O3
+
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): mis.cpp ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) mis.cpp ../graph_parser/parse.cpp ../graph_parser/util.cpp -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/mis/Makefile.gem5-fusion b/src/gpu/pannotia/mis/Makefile.gem5-fusion
new file mode 100644
index 0000000..b5678a5
--- /dev/null
+++ b/src/gpu/pannotia/mis/Makefile.gem5-fusion

@@ -0,0 +1,26 @@
+EXECUTABLE = mis_hip.gem5
+OPTS = -O3
+
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+# TODO: Need some sort of explicit PATH?  Read in?
+GEM5_PATH ?= /nobackup/sinclair/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): mis.cpp ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) mis.cpp ../graph_parser/parse.cpp ../graph_parser/util.cpp -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/mis/README.md b/src/gpu/pannotia/mis/README.md
new file mode 100644
index 0000000..e587055
--- /dev/null
+++ b/src/gpu/pannotia/mis/README.md

@@ -0,0 +1,49 @@
+---
+title: Pannotia MIS Test
+tags:
+    - x86
+    - amdgpu
+layout: default
+permalink: resources/pannotia/mis
+shortdoc: >
+    Resources to build a disk image with the GCN3 Pannotia MIS workload.
+---
+
+Maximal Independent Set (mis) is a graph analytics application that is part of the Pannotia benchmark suite.  It is designed to find a maximal subset of vertices in a graph such that no two are adjacent.  The provided version is for use with the gpu-compute model of gem5.  Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling MIS, compiling the GCN3_X86/Vega_X86 versions of gem5, and running MIS on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+To compile MIS:
+
+```
+cd src/gpu/pannotia/mis
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead.  Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/).  By default, the Makefile builds for gfx801 and gfx803, and is placed in the src/gpu/pannotia/mis/bin folder.
+
+## Compiling GCN3_X86/gem5.opt
+
+MIS is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture.   The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running MIS on GCN3_X86/gem5.opt
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/mis/bin -c mis.gem5 --options="1k_128k.gr 0"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>.  We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/mis/1k_128k.gr>), as this is the smallest input that can be run with MIS.  Note that 1k_128k is not designed for MIS specifically though -- the above link has larger graphs designed to run with MIS that you should consider using for larger experiments.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.

diff --git a/src/gpu/pannotia/mis/kernel.h b/src/gpu/pannotia/mis/kernel.h
new file mode 100644
index 0000000..19d0eae
--- /dev/null
+++ b/src/gpu/pannotia/mis/kernel.h

@@ -0,0 +1,199 @@
+/************************************************************************************\
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include "hip/hip_runtime.h"
+
+#define BIGNUM 99999999
+
+/**
+* init kernel
+* @param s_array   set array
+* @param c_array   status array
+* @param cu_array  status update array
+* @param num_nodes number of vertices
+* @param num_edges number of edges
+*/
+__global__ void
+init(int *s_array, int *c_array, int *cu_array, int num_nodes, int num_edges)
+{
+    // Get my workitem id
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+    if (tid < num_nodes) {
+        // Set the status array: not processed
+        c_array[tid] = -1;
+        cu_array[tid] = -1;
+        s_array[tid] = 0;
+    }
+}
+
+/**
+* mis1 kernel
+* @param row          csr pointer array
+* @param col          csr column index array
+* @param node_value   node value array
+* @param s_array      set array
+* @param c_array node status array
+* @param min_array    node value array
+* @param stop node    value array
+* @param num_nodes    number of vertices
+* @param num_edges    number of edges
+*/
+__global__ void
+mis1(int *row, int *col, int *node_value, int *s_array, int *c_array,
+     int *min_array, int *stop, int num_nodes, int num_edges)
+{
+    // Get workitem id
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+    if (tid < num_nodes) {
+        // If the vertex is not processed
+        if (c_array[tid] == -1) {
+            *stop = 1;
+            // Get the start and end pointers
+            int start = row[tid];
+            int end;
+            if (tid + 1 < num_nodes) {
+                end = row[tid + 1];
+            } else {
+                end = num_edges;
+            }
+
+            // Navigate the neighbor list and find the min
+            int min = BIGNUM;
+            for (int edge = start; edge < end; edge++) {
+                if (c_array[col[edge]] == -1) {
+                    if (node_value[col[edge]] < min) {
+                        min = node_value[col[edge]];
+                    }
+                }
+            }
+            min_array[tid] = min;
+        }
+    }
+}
+
+/**
+* mis2 kernel
+* @param row          csr pointer array
+* @param col          csr column index array
+* @param node_value   node value array
+* @param s_array      set array
+* @param c_array      status array
+* @param cu_array     status update array
+* @param min_array    node value array
+* @param num_nodes    number of vertices
+* @param num_edges    number of edges
+*/
+__global__ void
+mis2(int *row, int *col, int *node_value, int *s_array, int *c_array,
+     int *cu_array, int *min_array, int num_nodes, int num_edges)
+{
+    // Get my workitem id
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        if (node_value[tid] <= min_array[tid]  && c_array[tid] == -1) {
+            // -1: Not processed -2: Inactive 2: Independent set
+            // Put the item into the independent set
+            s_array[tid] = 2;
+
+            // Get the start and end pointers
+            int start = row[tid];
+            int end;
+
+            if (tid + 1 < num_nodes) {
+                end = row[tid + 1];
+            } else {
+                end = num_edges;
+            }
+
+            // Set the status to inactive
+            c_array[tid] = -2;
+
+            // Mark all the neighbors inactive
+            for (int edge = start; edge < end; edge++) {
+                if (c_array[col[edge]] == -1) {
+                    //use status update array to avoid race
+                    cu_array[col[edge]] = -2;
+                }
+            }
+        }
+    }
+}
+
+/**
+* mis3 kernel
+* @param cu_array     status update array
+* @param  c_array     status array
+* @param num_nodes    number of vertices
+*/
+__global__ void
+mis3(int *cu_array, int *c_array, int num_nodes)
+{
+    //get my workitem id
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    //set the status array
+    if (tid < num_nodes && cu_array[tid] == -2) {
+        c_array[tid] = cu_array[tid];
+    }
+}
+
+#endif // KERNEL_H

diff --git a/src/gpu/pannotia/mis/mis.cpp b/src/gpu/pannotia/mis/mis.cpp
new file mode 100644
index 0000000..a6a04fb
--- /dev/null
+++ b/src/gpu/pannotia/mis/mis.cpp

@@ -0,0 +1,334 @@
+/************************************************************************************\
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include <algorithm>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+#define RANGE 2048
+
+void dump2file(int *adjmatrix, int num_nodes);
+void print_vector(int *vector, int num);
+void print_vectorf(float *vector, int num);
+
+int main(int argc, char **argv)
+{
+    char *tmpchar;
+
+    int num_nodes;
+    int num_edges;
+    int file_format = 1;
+    bool directed = 0;
+
+    hipError_t err = hipSuccess;
+
+    // Input arguments
+    if (argc == 3) {
+        tmpchar = argv[1]; // Graph inputfile
+        file_format = atoi(argv[2]); // Choose file format
+    } else {
+        fprintf(stderr, "You did something wrong!\n");
+        exit(1);
+    }
+
+    srand(7);
+
+    // Allocate the csr array
+    csr_array *csr;
+
+    // Parse the graph into the csr structure
+    if (file_format == 1) {
+        csr = parseMetis(tmpchar, &num_nodes, &num_edges, directed);
+    } else if (file_format == 0) {
+        csr = parseCOO(tmpchar, &num_nodes, &num_edges, directed);
+    } else {
+        fprintf(stderr, "reserve for future");
+        exit(1);
+    }
+
+    // Allocate the node value array
+    int *node_value = (int *)malloc(num_nodes * sizeof(int));
+    if (!node_value) fprintf(stderr, "malloc failed node_value\n");
+
+    // Allocate the set array
+    int *s_array = (int *)malloc(num_nodes * sizeof(int));
+    if (!s_array) fprintf(stderr, "malloc failed node_value\n");
+
+    // Randomize the node values
+    for (int i = 0; i < num_nodes; i++) {
+        node_value[i] =  rand() % RANGE;
+    }
+
+    // Create device side buffers
+    int *row_d;
+    int *col_d;
+
+    int *c_array_d;
+    int *c_array_u_d;
+    int *s_array_d;
+    int *node_value_d;
+    int *min_array_d;
+    int *stop_d;
+
+    // Allocate the device-side buffers for the graph
+    err = hipMalloc(&row_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n",  num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&col_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc col_d (size:%d) => %s\n",  num_edges , hipGetErrorString(err));
+        return -1;
+    }
+
+    // Termination variable
+    err = hipMalloc(&stop_d, sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n", 1, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Allocate the device-side buffers for mis
+    err = hipMalloc(&min_array_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc min_array_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&c_array_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc c_array_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&c_array_u_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc c_array_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&s_array_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc s_array_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&node_value_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc node_value_d (size:%d) => %s\n", num_nodes , hipGetErrorString(err));
+        return -1;
+    }
+
+//    double time1 = gettime();
+
+#ifdef GEM5_FUSION
+    m5_work_begin(0, 0);
+#endif
+
+    // Copy data to device-side buffers
+    err = hipMemcpy(row_d, csr->row_array, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(node_value_d, node_value, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy feature_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Work dimensions
+    int block_size = 128;
+    int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+    dim3 threads(block_size,  1, 1);
+    dim3 grid(num_blocks, 1, 1);
+
+    // Launch the initialization kernel
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(init), dim3(grid), dim3(threads), 0, 0, s_array_d, c_array_d, c_array_u_d,
+                             num_nodes, num_edges);
+    hipDeviceSynchronize();
+    err = hipGetLastError();
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: init kernel (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+    // Termination variable
+    int stop = 1;
+    int iterations = 0;
+    while (stop) {
+        stop = 0;
+
+        // Copy the termination variable to the device
+        err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: write stop_d variable (%s)\n", hipGetErrorString(err));
+            return -1;
+        }
+
+        // Launch mis1
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(mis1), dim3(grid), dim3(threads), 0, 0, row_d, col_d, node_value_d, s_array_d,
+                                 c_array_d, min_array_d, stop_d, num_nodes,
+                                 num_edges);
+
+        // Launch mis2
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(mis2), dim3(grid), dim3(threads), 0, 0, row_d, col_d, node_value_d, s_array_d,
+                                 c_array_d, c_array_u_d, min_array_d, num_nodes,
+                                 num_edges);
+
+        // Launch mis3
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(mis3), dim3(grid), dim3(threads), 0, 0, c_array_u_d, c_array_d, num_nodes);
+
+        // Copy the termination variable back
+        err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: read stop_d variable (%s)\n", hipGetErrorString(err));
+            return -1;
+        }
+
+        iterations++;
+    }
+
+    hipDeviceSynchronize();
+
+    err = hipMemcpy(s_array, s_array_d, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy s_array_d failed (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+#ifdef GEM5_FUSION
+    m5_work_end(0, 0);
+#endif
+
+//    double time2 = gettime();
+
+    // Print out the timing characterisitics
+    printf("number of iterations: %d\n", iterations);
+//    printf("kernel + memcpy time %f ms\n", (time2 - time1) * 1000);
+
+#if 1
+    // Print the set array
+    print_vector(s_array, num_nodes);
+#endif
+
+    // Clean up the host-side arrays
+    free(node_value);
+    free(s_array);
+    csr->freeArrays();
+    free(csr);
+
+    // Clean up the device-side arrays
+    hipFree(row_d);
+    hipFree(col_d);
+    hipFree(c_array_d);
+    hipFree(s_array_d);
+    hipFree(node_value_d);
+    hipFree(min_array_d);
+    hipFree(stop_d);
+
+    return 0;
+}
+
+void print_vector(int *vector, int num)
+{
+
+    FILE * fp = fopen("result.out", "w");
+    if (!fp) {
+        printf("ERROR: unable to open result.txt\n");
+    }
+
+    for (int i = 0; i < num; i++) {
+        fprintf(fp, "%d\n", vector[i]);
+    }
+
+    fclose(fp);
+
+}
+
+void print_vectorf(float *vector, int num)
+{
+
+    FILE * fp = fopen("result.out", "w");
+    if (!fp) {
+        printf("ERROR: unable to open result.txt\n");
+    }
+
+    for (int i = 0; i < num; i++) {
+        fprintf(fp, "%f\n", vector[i]);
+    }
+
+    fclose(fp);
+
+}

diff --git a/src/gpu/pannotia/pagerank/Makefile b/src/gpu/pannotia/pagerank/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/Makefile

@@ -0,0 +1,11 @@
+default:
+	make -f Makefile.default
+
+clean:
+	make -f Makefile.default clean
+
+gem5-fusion:
+	make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+	make -f Makefile.gem5-fusion clean

diff --git a/src/gpu/pannotia/pagerank/Makefile.default b/src/gpu/pannotia/pagerank/Makefile.default
new file mode 100644
index 0000000..db96b44
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/Makefile.default

@@ -0,0 +1,28 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+OPTS = -O3
+
+BASEEXE = pagerank
+VARIANT ?= DEFAULT
+ifeq ($(VARIANT),DEFAULT)
+    EXECUTABLE = $(BASEEXE)
+    CPPFILES += pagerank.cpp
+else ifeq ($(VARIANT),SPMV)
+    EXECUTABLE = $(BASEEXE)_spmv
+    CPPFILES += pagerank_spmv.cpp
+endif
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/pagerank/Makefile.gem5-fusion b/src/gpu/pannotia/pagerank/Makefile.gem5-fusion
new file mode 100644
index 0000000..06bd88b
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/Makefile.gem5-fusion

@@ -0,0 +1,34 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+OPTS = -O3
+
+# these are needed for m5ops
+# TODO: Need some sort of explicit PATH?  Read in?
+GEM5_PATH ?= /nobackup/sinclair/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I/../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BASEEXE = pagerank
+VARIANT ?= DEFAULT
+ifeq ($(VARIANT),DEFAULT)
+    EXECUTABLE = $(BASEEXE).gem5
+    CPPFILES += pagerank.cpp
+else ifeq ($(VARIANT),SPMV)
+    EXECUTABLE = $(BASEEXE)_spmv.gem5
+    CPPFILES += pagerank_spmv.cpp
+endif
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) $(OPTS) --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/pagerank/README.md b/src/gpu/pannotia/pagerank/README.md
new file mode 100644
index 0000000..726272a
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/README.md

@@ -0,0 +1,66 @@
+---
+title: Pannotia PageRank Test
+tags:
+    - x86
+    - amdgpu
+layout: default
+permalink: resources/pannotia/pagerank
+shortdoc: >
+    Resources to build a disk image with the GCN3 Pannotia PageRank workload.
+---
+
+PageRank (PR) is a graph analytics application that is part of the Pannotia benchmark suite.  It is an algorithm designed to calculate probability distributions representing the likelihood that a person randomly clicking on links arrives at any particular page.  The provided version is for use with the gpu-compute model of gem5.  Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling both PageRank variants, compiling the GCN3_X86/Vega_X86 versions of gem5, and running both PageRank variants on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+PR has two variants: default and spmv.  To compile the "default" variant:
+
+```
+cd src/gpu/pannotia/pagerank
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+To compile the "spmv" variant:
+
+```
+cd src/gpu/pannotia/pagerank
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu bash -c "export VARIANT=SPMV ; make gem5-fusion"
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead.  Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/).  By default, for both variants the Makefile builds for gfx801 and gfx803, and the binaries are placed in the src/gpu/pannotia/pagerank/bin folder.  Moreover, by default the VARIANT variable PageRank's Makefile assumes the csr variant is being used, hence why this variable does not need to be set for compiling it.
+
+## Compiling GCN3_X86/gem5.opt
+
+PageRank is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture.  The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running PageRank on GCN3_X86/gem5.opt
+
+The following command shows how to run the PageRank default version:
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/pagerank/coAuthorsDBLP.graph
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/pagerank/bin -c pagerank.gem5 --options="coAuthorsDBLP.graph 0"
+```
+
+To run the PageRank spmv version:
+
+# Assuming gem5, pannotia (input graphs, see below), and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/pagerank/coAuthorsDBLP.graph
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/pagerank/bin -c pagerank_spmv.gem5 --options="coAuthorsDBLP.graph 1"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>.  We recommend you start with the coAuthorsDBLP input for PR.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.

diff --git a/src/gpu/pannotia/pagerank/kernel.h b/src/gpu/pannotia/pagerank/kernel.h
new file mode 100644
index 0000000..f2407aa
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/kernel.h

@@ -0,0 +1,145 @@
+/************************************************************************************\
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief   pagerank 1
+ * @param   row         csr pointer array
+ * @param   col         csr column array
+ * @param   data        weight array
+ * @param   page_rank1  pagerank array 1
+ * @param   page_rank2  pagerank array 2
+ * @param   num_nodes   number of vertices
+ * @param   num_edges   number of edges
+ */
+__global__ void
+pagerank1(int *row, int *col, int *data, float *page_rank1, float *page_rank2,
+          const int num_nodes, const int num_edges)
+{
+    // Get my workitem id
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        // Get the starting and ending pointers of the neighborlist
+        int start = row[tid];
+        int end;
+        if (tid + 1 < num_nodes) {
+            end = row[tid + 1];
+        } else {
+            end = num_edges;
+        }
+
+        int nid;
+        // Navigate the neighbor list
+        for (int edge = start; edge < end; edge++) {
+            nid = col[edge];
+            // Transfer the PageRank value to neighbors
+            atomicAdd(&page_rank2[nid], page_rank1[tid] / (float)(end - start));
+        }
+    }
+}
+
+/**
+ * @brief   pagerank 2
+ * @param   row         csr pointer array
+ * @param   col         csr column array
+ * @param   data        weight array
+ * @param   page_rank1  pagerank array 1
+ * @param   page_rank2  pagerank array 2
+ * @param   num_nodes   number of vertices
+ * @param   num_edges   number of edges
+ */
+__global__ void
+pagerank2(int *row, int *col, int *data, float *page_rank1, float *page_rank2,
+          const int num_nodes, const int num_edges)
+{
+    // Get my workitem id
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    // Update pagerank value with the damping factor
+    if (tid < num_nodes) {
+        page_rank1[tid]	= 0.15 / (float)num_nodes + 0.85 * page_rank2[tid];
+        page_rank2[tid] = 0.0f;
+    }
+}
+
+/**
+ * @brief   inibuffer
+ * @param   row         csr pointer array
+ * @param   page_rank1  pagerank array 1
+ * @param   page_rank2  pagerank array 2
+ * @param   num_nodes   number of vertices
+ */
+__global__ void
+inibuffer(int *row, float *page_rank1, float *page_rank2, const int num_nodes,
+          const int num_edges)
+{
+    // Get my thread id
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        page_rank1[tid] = 1 / (float)num_nodes;
+        page_rank2[tid] = 0.0f;
+    }
+}
+
+#endif // KERNEL_H

diff --git a/src/gpu/pannotia/pagerank/kernel_spmv.h b/src/gpu/pannotia/pagerank/kernel_spmv.h
new file mode 100644
index 0000000..af956c6
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/kernel_spmv.h

@@ -0,0 +1,163 @@
+/************************************************************************************\
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#ifndef KERNEL_SPMV_H
+#define KERNEL_SPMV_H
+
+#include "hip/hip_runtime.h"
+
+/**
+ * @brief   inibuffer
+ * @param   page_rank1   PageRank array 1
+ * @param   page_rank2   PageRank array 2
+ * @param   num_nodes    number of vertices
+ */
+__global__ void
+inibuffer(float *page_rank1, float *page_rank2, const int num_nodes)
+{
+    // Get my workitem id
+    int tid = blockDim.x * blockIdx.x + threadIdx.x;
+    // Initialize two pagerank arrays
+    if (tid < num_nodes) {
+        page_rank1[tid] = 1 / (float)num_nodes;
+        page_rank2[tid] = 0.0f;
+    }
+}
+
+/**
+ * @brief   inicsr
+ * @param   row        csr pointer array
+ * @param   col        csr col array
+ * @param   data       csr weigh array
+ * @param   col_cnt    array for #. out-going edges
+ * @param   num_nodes  number of vertices
+ * @param   num_edges  number of edges
+ */
+__global__ void
+inicsr(int *row, int *col, float *data, int *col_cnt, int num_nodes,
+       int num_edges)
+{
+    // Get my workitem id
+    int tid = blockDim.x * blockIdx.x + threadIdx.x;
+    if (tid < num_nodes) {
+        // Get the starting and ending pointers
+        int start = row[tid];
+        int end;
+        if (tid + 1 < num_nodes) {
+            end = row[tid + 1] ;
+        } else {
+            end = num_edges;
+        }
+
+        int nid;
+        // Navigate one row of data
+        for (int edge = start; edge < end; edge++) {
+            nid = col[edge];
+            // Each neighbor will get equal amount of pagerank
+            data[edge] = 1.0 / (float)col_cnt[nid];
+        }
+    }
+}
+
+/**
+ * @brief   spmv_csr_scalar_kernel (simple spmv)
+ * @param   num_nodes  number of vertices
+ * @param   row        csr pointer array
+ * @param   col        csr col array
+ * @param   data       csr weigh array
+ * @param   x          input vector
+ * @param   y          output vector
+ */
+__global__ void
+spmv_csr_scalar_kernel(const int num_nodes, int *row, int *col, float *data,
+                       float *x, float *y)
+{
+    // Get my workitem id
+    int tid = blockDim.x * blockIdx.x + threadIdx.x;
+    if (tid < num_nodes) {
+        // Get the start and end pointers
+        int row_start = row[tid];
+        int row_end = row[tid + 1];
+        float sum = 0;
+        //navigate one row and sum all the elements
+        for (int j = row_start; j < row_end; j++) {
+            sum += data[j] * x[col[j]];
+        }
+        y[tid] += sum;
+    }
+}
+
+/**
+ * @brief   pagerank2
+ * @param   page_rank1   PageRank array 1
+ * @param   page_rank2   PageRank array 2
+ * @param   num_nodes    number of vertices
+ */
+__global__ void
+pagerank2(float *page_rank1, float *page_rank2, const int num_nodes)
+{
+    // Get my workitem id
+    int tid = blockDim.x * blockIdx.x + threadIdx.x;
+    // Update pagerank value with damping factor
+    if (tid < num_nodes) {
+        page_rank1[tid]	= 0.15f / (float)num_nodes + 0.85f * page_rank2[tid];
+        page_rank2[tid] = 0.0f;
+    }
+}
+
+#endif // KERNEL_SPMV_H

diff --git a/src/gpu/pannotia/pagerank/pagerank.cpp b/src/gpu/pannotia/pagerank/pagerank.cpp
new file mode 100644
index 0000000..3d88882
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/pagerank.cpp

@@ -0,0 +1,263 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+// Iteration count
+#define ITER 20
+
+void print_vectorf(float *vector, int num);
+
+int main(int argc, char **argv)
+{
+    char *tmpchar;
+
+    int num_nodes;
+    int num_edges;
+    int file_format = 1;
+    bool directed = 0;
+
+    hipError_t err = hipSuccess;
+
+    if (argc == 3) {
+        tmpchar = argv[1]; // Graph inputfile
+        file_format = atoi(argv[2]); // File format
+    } else {
+        fprintf(stderr, "You did something wrong!\n");
+        exit(1);
+    }
+
+    // Allocate the csr structure
+    csr_array *csr;
+
+    // Parse graph files into csr structure
+    if (file_format == 1) {
+        // Metis
+        csr = parseMetis(tmpchar, &num_nodes, &num_edges, directed);
+    } else if (file_format == 0) {
+        // Dimacs9
+        csr = parseCOO(tmpchar, &num_nodes, &num_edges, 1);
+    } else if (file_format == 2) {
+        // Matrix market
+        csr = parseMM(tmpchar, &num_nodes, &num_edges, directed, 0);
+    } else {
+        printf("reserve for future");
+        exit(1);
+    }
+
+    // Allocate rank_array
+    float *rank_array = (float *)malloc(num_nodes * sizeof(float));
+    if (!rank_array) {
+        fprintf(stderr, "rank array not allocated successfully\n");
+        return -1;
+    }
+
+    int *row_d;
+    int *col_d;
+    int *data_d;
+
+    float *pagerank1_d;
+    float *pagerank2_d;
+
+    // Create device-side buffers for the graph
+    err = hipMalloc(&row_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n",  num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&col_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc col_d (size:%d) => %s\n",  num_edges, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&data_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc data_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Create buffers for pagerank
+    err = hipMalloc(&pagerank1_d, num_nodes * sizeof(float));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc pagerank1_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&pagerank2_d, num_nodes * sizeof(float));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc pagerank2_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+//    double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+    m5_work_begin(0, 0);
+#endif
+
+    // Copy the data to the device-side buffers
+    err = hipMemcpy(row_d, csr->row_array, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR:#endif hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Set up work dimensions
+    int block_size  = 256;
+    int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+    dim3 threads(block_size, 1, 1);
+    dim3 grid(num_blocks, 1, 1);
+
+//    double timer3 = gettime();
+
+    // Launch the initialization kernel
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(inibuffer), dim3(grid), dim3(threads), 0, 0, row_d, pagerank1_d, pagerank2_d, num_nodes,
+                                  num_edges);
+    hipDeviceSynchronize();
+    err = hipGetLastError();
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: cudaLaunch failed (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+    // Run PageRank for some iter. TO: convergence determination
+    for (int i = 0; i < ITER; i++) {
+        // Launch pagerank kernel 1
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(pagerank1), dim3(grid), dim3(threads), 0, 0, row_d, col_d, data_d, pagerank1_d,
+                                      pagerank2_d, num_nodes, num_edges);
+
+        // Launch pagerank kernel 2
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(pagerank2), dim3(grid), dim3(threads), 0, 0, row_d, col_d, data_d, pagerank1_d,
+                                      pagerank2_d, num_nodes, num_edges);
+    }
+    hipDeviceSynchronize();
+
+//    double timer4 = gettime();
+
+    // Copy the rank buffer back
+    err = hipMemcpy(rank_array, pagerank1_d, num_nodes * sizeof(float), hipMemcpyDeviceToHost);
+
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy() failed (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+#ifdef GEM5_FUSION
+    m5_work_end(0, 0);
+#endif
+
+//    double timer2 = gettime();
+
+    // Report timing characteristics
+//    printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+//    printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+    // Print rank array
+    print_vectorf(rank_array, num_nodes);
+#endif
+
+    // Free the host-side arrays
+    free(rank_array);
+    csr->freeArrays();
+    free(csr);
+
+    // Free the device buffers
+    hipFree(row_d);
+    hipFree(col_d);
+    hipFree(data_d);
+
+    hipFree(pagerank1_d);
+    hipFree(pagerank2_d);
+
+    return 0;
+
+}
+
+void print_vectorf(float *vector, int num)
+{
+    FILE * fp = fopen("result.out", "w");
+    if (!fp) {
+        printf("ERROR: unable to open result.txt\n");
+    }
+
+    for (int i = 0; i < num; i++) {
+        fprintf(fp, "%f\n", vector[i]);
+    }
+
+    fclose(fp);
+}
+

diff --git a/src/gpu/pannotia/pagerank/pagerank_spmv.cpp b/src/gpu/pannotia/pagerank/pagerank_spmv.cpp
new file mode 100644
index 0000000..4f650c6
--- /dev/null
+++ b/src/gpu/pannotia/pagerank/pagerank_spmv.cpp

@@ -0,0 +1,278 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR"�) (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel_spmv.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+// Iteration count
+#define ITER 20
+
+void print_vectorf(float *vector, int num);
+
+int main(int argc, char **argv)
+{
+    char *tmpchar;
+
+    int num_nodes;
+    int num_edges;
+    int file_format = 1;
+    bool directed = 0;
+
+    hipError_t err = hipSuccess;
+
+    if (argc == 3) {
+        tmpchar = argv[1]; // Graph inputfile
+        file_format = atoi(argv[2]);
+    } else {
+        fprintf(stderr, "You did something wrong!\n");
+        exit(1);
+    }
+
+    // Allocate the csr structure
+    csr_array *csr;
+
+    // Parse graph files into csr structure
+    if (file_format == 1) {
+       csr = parseMetis_transpose(tmpchar, &num_nodes, &num_edges, directed);
+    } else if (file_format == 0) {
+       csr = parseCOO_transpose(tmpchar, &num_nodes, &num_edges, directed);
+    } else {
+       printf("reserve for future");
+       exit(1);
+    }
+
+    // Allocate rank_arrays
+    float *pagerank_array = (float *)malloc(num_nodes * sizeof(float));
+    if (!pagerank_array) fprintf(stderr, "malloc failed page_rank_array\n");
+    float *pagerank_array2 = (float *)malloc(num_nodes * sizeof(float));
+    if (!pagerank_array2) fprintf(stderr, "malloc failed page_rank_array2\n");
+
+    int *row_d;
+    int *col_d;
+    float *data_d;
+
+    float *pagerank1_d;
+    float *pagerank2_d;
+    int *col_cnt_d;
+
+    // Create device-side buffers for the graph
+    err = hipMalloc(&row_d, (num_nodes + 1) * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n",  num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&col_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc col_d (size:%d) => %s\n",  num_edges, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&data_d, num_edges * sizeof(float));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc data_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Create buffers for pagerank
+    err = hipMalloc(&pagerank1_d, num_nodes * sizeof(float));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc pagerank1_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&pagerank2_d, num_nodes * sizeof(float));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc pagerank2_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&col_cnt_d, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc col_cnt_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+    m5_work_begin(0, 0);
+#endif
+
+    // Copy the data to the device-side buffers
+    err = hipMemcpy(row_d, csr->row_array, (num_nodes + 1) * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR:#endif hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(col_cnt_d, csr->col_cnt, num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy col_cnt_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Set up work dimensions
+    int block_size = 64;
+    int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+    dim3 threads(block_size, 1, 1);
+    dim3 grid(num_blocks, 1, 1);
+
+    double timer3 = gettime();
+
+    // Launch the initialization kernel
+    hipLaunchKernelGGL(inibuffer, dim3(grid), dim3(threads), 0, 0, pagerank1_d, pagerank2_d, num_nodes);
+    hipDeviceSynchronize();
+    err = hipGetLastError();
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipLaunchByPtr failed (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+    // Initialize the CSR
+    hipLaunchKernelGGL(inicsr, dim3(grid), dim3(threads), 0, 0, row_d, col_d, data_d, col_cnt_d, num_nodes,
+                               num_edges);
+    hipDeviceSynchronize();
+    err = hipGetLastError();
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipLaunchByPtr failed (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+    // Run PageRank for some iter. TO: convergence determination
+    for (int i = 0; i < ITER; i++) {
+        // Launch pagerank kernel 1
+        hipLaunchKernelGGL(spmv_csr_scalar_kernel, dim3(grid), dim3(threads), 0, 0, num_nodes, row_d, col_d,
+                                                   data_d, pagerank1_d,
+                                                   pagerank2_d);
+
+        // Launch pagerank kernel 2
+        hipLaunchKernelGGL(pagerank2, dim3(grid), dim3(threads), 0, 0, pagerank1_d, pagerank2_d, num_nodes);
+    }
+    hipDeviceSynchronize();
+
+    double timer4 = gettime();
+
+    // Copy the rank buffer back
+    err = hipMemcpy(pagerank_array, pagerank1_d, num_nodes * sizeof(float), hipMemcpyDeviceToHost);
+
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy() failed (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+#ifdef GEM5_FUSION
+    m5_work_end(0, 0);
+#endif
+
+    double timer2 = gettime();
+
+    // Report timing characteristics
+    printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+    printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+
+#if 1
+    // Print rank array
+    print_vectorf(pagerank_array, num_nodes);
+#endif
+
+    // Free the host-side arrays
+    free(pagerank_array);
+    free(pagerank_array2);
+    csr->freeArrays();
+    free(csr);
+
+    // Free the device buffers
+    hipFree(row_d);
+    hipFree(col_d);
+    hipFree(data_d);
+
+    hipFree(pagerank1_d);
+    hipFree(pagerank2_d);
+
+    return 0;
+}
+
+void print_vectorf(float *vector, int num)
+{
+    FILE * fp = fopen("result.out", "w");
+    if (!fp) {
+        printf("ERROR: unable to open result.txt\n");
+    }
+
+    for (int i = 0; i < num; i++) {
+        fprintf(fp, "%f\n", vector[i]);
+    }
+
+    fclose(fp);
+}
+

diff --git a/src/gpu/pannotia/sssp/Makefile b/src/gpu/pannotia/sssp/Makefile
new file mode 100644
index 0000000..6158bac
--- /dev/null
+++ b/src/gpu/pannotia/sssp/Makefile

@@ -0,0 +1,11 @@
+default:
+	make -f Makefile.default
+
+clean:
+	make -f Makefile.default clean
+
+gem5-fusion:
+	make -f Makefile.gem5-fusion
+
+clean-gem5-fusion:
+	make -f Makefile.gem5-fusion clean

diff --git a/src/gpu/pannotia/sssp/Makefile.default b/src/gpu/pannotia/sssp/Makefile.default
new file mode 100644
index 0000000..a0dfab1
--- /dev/null
+++ b/src/gpu/pannotia/sssp/Makefile.default

@@ -0,0 +1,29 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+BASEEXE = sssp
+VARIANT ?= CSR
+ifeq ($(VARIANT),CSR)
+    EXECUTABLE = $(BASEEXE)
+    CPPFILES += sssp_csr.cpp
+else ifeq ($(VARIANT),ELL)
+    EXECUTABLE = $(BASEEXE)_ell
+    CPPFILES += sssp_ell.cpp
+endif
+
+OPTS = -O3
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) -O3 --amdgpu-target=gfx801,gfx803,gfx906 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -o $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/sssp/Makefile.gem5-fusion b/src/gpu/pannotia/sssp/Makefile.gem5-fusion
new file mode 100644
index 0000000..cb58ff0
--- /dev/null
+++ b/src/gpu/pannotia/sssp/Makefile.gem5-fusion

@@ -0,0 +1,35 @@
+HIP_PATH ?= /opt/rocm/hip
+HIPCC = $(HIP_PATH)/bin/hipcc
+
+# these are needed for m5ops
+# TODO: Need some sort of explicit PATH?  Read in?
+GEM5_PATH ?= /nobackup/sinclair/gem5
+CFLAGS += -I$(GEM5_PATH)/include -I../graph_parser
+LDFLAGS += -L$(GEM5_PATH)/util/m5/build/x86/out -lm5
+
+BASEEXE = sssp
+VARIANT ?= CSR
+ifeq ($(VARIANT),CSR)
+    EXECUTABLE = $(BASEEXE).gem5
+    CPPFILES += sssp_csr.cpp
+else ifeq ($(VARIANT),ELL)
+    EXECUTABLE = $(BASEEXE)_ell.gem5
+    CPPFILES += sssp_ell.cpp
+endif
+
+OPTS = -O3
+
+BIN_DIR ?= ./bin
+
+all: $(BIN_DIR)/$(EXECUTABLE)
+
+$(BIN_DIR)/$(EXECUTABLE): $(CPPFILES) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(BIN_DIR)
+	$(HIPCC) -O3 --amdgpu-target=gfx801,gfx803 $(CXXFLAGS) ../graph_parser/parse.cpp ../graph_parser/util.cpp $(CPPFILES) -DGEM5_FUSION -o $(BIN_DIR)/$(EXECUTABLE) $(CFLAGS) $(LDFLAGS)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: square clean

diff --git a/src/gpu/pannotia/sssp/README.md b/src/gpu/pannotia/sssp/README.md
new file mode 100644
index 0000000..43cfe0a
--- /dev/null
+++ b/src/gpu/pannotia/sssp/README.md

@@ -0,0 +1,66 @@
+---
+title: Pannotia SSSP Test
+tags:
+    - x86
+    - amdgpu
+layout: default
+permalink: resources/pannotia/sssp
+shortdoc: >
+    Resources to build a disk image with the GCN3 Pannotia SSSP workload.
+---
+
+Single-Source Shortest Path (sssp) is a graph analytics application that is part of the Pannotia benchmark suite.  It is designed to calculate the shortest paths between the source vertex and all the other vertices in a graph.  The provided version is for use with the gpu-compute model of gem5.  Thus, it has been ported from the prior CUDA and OpenCL variants to HIP, and validated on a Vega-class AMD GPU.
+
+Compiling both SSSP variants, compiling the GCN3_X86/Vega_X86 versions of gem5, and running both SSSP variants on gem5 is dependent on the gcn-gpu docker image, `util/dockerfiles/gcn-gpu/Dockerfile` on the [gem5 stable branch](https://gem5.googlesource.com/public/gem5/+/refs/heads/stable).
+
+## Compilation and Running
+
+SSSP has two variants: csr and ell.  To compile the "csr" variant:
+
+```
+cd src/gpu/pannotia/sssp
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu make gem5-fusion
+```
+
+To compile the "ell" variant:
+
+```
+cd src/gpu/pannotia/sssp
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu bash -c "export VARIANT=ELL ; make gem5-fusion"
+```
+
+If you use the Makefile.default file instead, the Makefile will generate code designed to run on the real GPU instead.  Moreover, note that Makefile.gem5-fusion requires you to set the GEM5_ROOT variable (either on the command line or by modifying the Makefile), because the Pannotia applications have been updated to use [m5ops](https://www.gem5.org/documentation/general_docs/m5ops/).  By default, for both variants the Makefile builds for gfx801 and gfx803, and the binaries are placed in the src/gpu/pannotia/sssp/bin folder.  Moreover, by default the VARIANT variable SSSP's Makefile assumes the csr variant is being used, hence why this variable does not need to be set for compiling it.
+
+## Compiling GCN3_X86/gem5.opt
+
+SSSP is a GPU application, which requires that gem5 is built with the GCN3_X86 (or Vega_X86, although this has been less heavily tested) architecture.  The test is run with the GCN3_X86 gem5 variant, compiled using the gcn-gpu docker image:
+
+```
+git clone https://gem5.googlesource.com/public/gem5
+cd gem5
+docker run -u $UID:$GID --volume $(pwd):$(pwd) -w $(pwd) gcr.io/gem5-test/gcn-gpu:latest scons build/GCN3_X86/gem5.opt -j <num cores>
+```
+
+## Running SSSP on GCN3_X86/gem5.opt
+
+The following command shows how to run the SSSP csr version:
+
+# Assuming gem5 and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/sssp/bin -c sssp_csr.gem5 --options="1k_128k.gr 0"
+```
+
+To run the SSSP ell version:
+
+# Assuming gem5, pannotia (input graphs, see below), and gem5-resources are in your working directory
+```
+wget http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr
+docker run --rm -v ${PWD}:${PWD} -w ${PWD} -u $UID:$GID gcr.io/gem5-test/gcn-gpu gem5/build/GCN3_X86/gem5.opt gem5/configs/example/apu_se.py -n3 --mem-size=8GB --benchmark-root=gem5-resources/src/gpu/pannotia/sssp/bin -c sssp_ell.gem5 --options="1k_128k.gr 0"
+```
+
+Note that the datasets from the original Pannotia suite have been uploaded to: <http://dist.gem5.org/dist/develop/datasets/pannotia>.  We recommend you start with the 1k_128k.gr input (<http://dist.gem5.org/dist/develop/datasets/pannotia/bc/1k_128k.gr>), as this is the smallest input that can be run with SSSP.  Note that 1k_128k is not designed for SSSP specifically though -- the above link has larger graphs designed to run with SSSP that you should consider using for larger experiments.
+
+## Pre-built binary
+
+A pre-built binary will be added soon.

diff --git a/src/gpu/pannotia/sssp/kernel.h b/src/gpu/pannotia/sssp/kernel.h
new file mode 100644
index 0000000..e56140e
--- /dev/null
+++ b/src/gpu/pannotia/sssp/kernel.h

@@ -0,0 +1,190 @@
+/************************************************************************************\
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#ifndef KERNEL_H
+#define KERNEL_H
+
+#include "hip/hip_runtime.h"
+#define BIG_NUM 99999999
+
+/**
+ * @brief   min.+
+ * @param   num_nodes  Number of vertices
+ * @param   row        CSR pointer array
+ * @param   col        CSR column array
+ * @param   data       Weight array
+ * @param   x          Input vector
+ * @param   y          Output vector
+ */
+__global__ void
+spmv_min_dot_plus_kernel(const int num_rows, int *row, int *col, int *data,
+                         int *x, int *y)
+{
+    // Get my workitem id
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    if (tid < num_rows) {
+        // Get the start and end pointers
+        int row_start = row[tid];
+        int row_end = row[tid + 1];
+
+        // Perform + for each pair of elements and a reduction with min
+        int min = x[tid];
+        for (int i = row_start; i < row_end; i++) {
+            if (data[i] + x[col[i]] < min) {
+                min = data[i] + x[col[i]];
+            }
+        }
+        y[tid] = min;
+    }
+}
+
+/**
+ * @brief   min.+
+ * @param   num_nodes  number of vertices
+ * @param   height     the height of the adjacency matrix (col-major)
+ * @param   col        the col array
+ * @param   data       the data array
+ * @param   x          the input vector
+ * @param   y          the output vector
+ */
+__global__ void
+ell_min_dot_plus_kernel(const int num_nodes, const int height, int *col,
+                        int *data, int *x, int *y)
+{
+    // Get workitem id
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        int mat_offset = tid;
+        int min = x[tid];
+
+        // The vertices process a row of matrix (col-major)
+        for (int i = 0; i < height; i++) {
+            int mat_elem = data[mat_offset];
+            int vec_elem = x[col[mat_offset]];
+            if (mat_elem + vec_elem < min) {
+                min = mat_elem + vec_elem;
+            }
+            mat_offset += num_nodes;
+        }
+        y[tid] = min;
+    }
+}
+
+/**
+ * @brief   vector_init
+ * @param   vector1      vector1
+ * @param   vector2      vector2
+ * @param   i            source vertex id
+ * @param   num_nodes    number of vertices
+ */
+__global__ void
+vector_init(int *vector1, int *vector2, const int i, const int num_nodes)
+{
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        if (tid == i) {
+            // If it is the source vertex
+            vector1[tid] = 0;
+            vector2[tid] = 0;
+        } else {
+            // If it a non-source vertex
+            vector1[tid] = BIG_NUM;
+            vector2[tid] = BIG_NUM;
+        }
+    }
+}
+
+/**
+ * @brief   vector_assign
+ * @param   vector1      vector1
+ * @param   vector2      vector2
+ * @param   num_nodes    number of vertices
+ */
+__global__ void
+vector_assign(int *vector1, int *vector2, const int num_nodes)
+{
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        vector1[tid] = vector2[tid];
+    }
+}
+
+/**
+ * @brief   vector_diff
+ * @param   vector1      vector1
+ * @param   vector2      vector2
+ * @param   stop         termination variable
+ * @param   num_nodes    number of vertices
+ */
+__global__ void
+vector_diff(int *vector1, int *vector2, int *stop, const int num_nodes)
+{
+    int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+
+    if (tid < num_nodes) {
+        if (vector2[tid] != vector1[tid]) {
+            *stop = 1;
+        }
+    }
+}
+
+#endif // KERNEL_H

diff --git a/src/gpu/pannotia/sssp/sssp_csr.cpp b/src/gpu/pannotia/sssp/sssp_csr.cpp
new file mode 100644
index 0000000..f971d17
--- /dev/null
+++ b/src/gpu/pannotia/sssp/sssp_csr.cpp

@@ -0,0 +1,300 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <sys/time.h>
+#include <algorithm>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+#include <gem5/m5ops.h>
+#endif
+
+void print_vector(int *vector, int num);
+
+int main(int argc, char **argv)
+{
+    char *tmpchar;
+    bool directed = 1;
+
+    int num_nodes;
+    int num_edges;
+    int file_format = 1;
+
+    hipError_t err = hipSuccess;
+
+    if (argc == 3) {
+        tmpchar = argv[1];  // Graph inputfile
+        file_format = atoi(argv[2]);
+    } else {
+        fprintf(stderr, "You did something wrong!\n");
+        exit(1);
+    }
+
+    // Allocate the csr structure
+    csr_array *csr;
+
+    // Parse the graph and store it into the CSR structure
+    if (file_format == 1) {
+        csr = parseMetis_transpose(tmpchar, &num_nodes, &num_edges, directed);
+    } else if (file_format == 0) {
+        csr = parseCOO_transpose(tmpchar, &num_nodes, &num_edges, directed);
+    } else {
+        printf("reserve for future");
+        exit(1);
+    }
+
+    // Allocate the cost array
+    int *cost_array = (int *)malloc(num_nodes * sizeof(int));
+    if (!cost_array) fprintf(stderr, "malloc failed cost_array\n");
+
+    // Set the cost array to zero
+    for (int i = 0; i < num_nodes; i++) {
+        cost_array[i] = 0;
+    }
+
+    // Create device-side buffers
+    int *row_d;
+    int *col_d;
+    int *data_d;
+    int *vector_d1;
+    int *vector_d2;
+    int *stop_d;
+
+    // Create the device-side graph structure
+    err = hipMalloc(&row_d, (num_nodes + 1) * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&col_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc col_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&data_d, num_edges * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc data_d (size:%d) => %s\n", num_edges, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Termination variable
+    err = hipMalloc(&stop_d, sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n", 1, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Create the device-side buffers for sssp
+    err = hipMalloc(&vector_d1, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc vector_d1 (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&vector_d2, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc vector_d2 (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    //double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+    m5_work_begin(0, 0);
+#endif
+
+    // Copy data to device side buffers
+    err = hipMemcpy(row_d, csr->row_array, (num_nodes + 1) * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy row_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(col_d, csr->col_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy col_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(data_d, csr->data_array, num_edges * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy data_d (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    //double timer3 = gettime();
+
+    // Work dimensions
+    int block_size = 64;
+    int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+    dim3 threads(block_size, 1, 1);
+    dim3 grid(num_blocks, 1, 1);
+
+    // Source vertex 0
+    int sourceVertex = 0;
+
+    // Launch the initialization kernel
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_init), dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2, sourceVertex, num_nodes);
+    hipDeviceSynchronize();
+    err = hipGetLastError();
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: vector_init failed (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+    int stop = 1;
+    int cnt = 0;
+    // Main computation loop
+    for (int i = 1; i < num_nodes; i++) {
+        // Reset the termination variable
+        stop = 0;
+
+        // Copy the termination variable to the device
+        err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: write stop_d (%s)\n", hipGetErrorString(err));
+            return -1;
+        }
+
+        // Launch the assignment kernel
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_assign), dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2, num_nodes);
+
+        // Launch the min.+ kernel
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(spmv_min_dot_plus_kernel), dim3(grid), dim3(threads), 0, 0, num_nodes, row_d, col_d,
+                                                     data_d, vector_d1,
+                                                     vector_d2);
+
+        // Launch the check kernel
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_diff), dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2,
+                                        stop_d, num_nodes);
+
+        // Read the termination variable back
+        err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: read stop_d (%s)\n", hipGetErrorString(err));
+            return -1;
+        }
+
+        // Exit the loop
+        if (stop == 0) {
+            break;
+        }
+        cnt++;
+    }
+    hipDeviceSynchronize();
+    //double timer4 = gettime();
+
+    // Read the cost_array back
+    err = hipMemcpy(cost_array, vector_d1, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: read vector_d1 (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+#ifdef GEM5_FUSION
+    m5_work_end(0, 0);
+#endif
+
+    //double timer2 = gettime();
+
+    // Print the timing statistics
+    //printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+    //printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+    printf("number iterations = %d\n", cnt);
+
+#if 1
+    // Print cost_array
+    print_vector(cost_array, num_nodes);
+#endif
+
+    // Clean up the host arrays
+    free(cost_array);
+    csr->freeArrays();
+    free(csr);
+
+    // Clean up the device-side buffers
+    hipFree(row_d);
+    hipFree(col_d);
+    hipFree(data_d);
+    hipFree(stop_d);
+    hipFree(vector_d1);
+    hipFree(vector_d2);
+
+    return 0;
+}
+
+void print_vector(int *vector, int num)
+{
+
+    FILE * fp = fopen("result.out", "w");
+    if (!fp) {
+        printf("ERROR: unable to open result.txt\n");
+    }
+
+    for (int i = 0; i < num; i++)
+        fprintf(fp, "%d: %d\n", i + 1, vector[i]);
+
+    fclose(fp);
+}

diff --git a/src/gpu/pannotia/sssp/sssp_ell.cpp b/src/gpu/pannotia/sssp/sssp_ell.cpp
new file mode 100644
index 0000000..a621b17
--- /dev/null
+++ b/src/gpu/pannotia/sssp/sssp_ell.cpp

@@ -0,0 +1,300 @@
+/************************************************************************************\ 
+ *                                                                                  *
+ * Copyright � 2014 Advanced Micro Devices, Inc.                                    *
+ * Copyright (c) 2015 Mark D. Hill and David A. Wood                                *
+ * Copyright (c) 2021 Gaurav Jain and Matthew D. Sinclair                           *
+ * All rights reserved.                                                             *
+ *                                                                                  *
+ * Redistribution and use in source and binary forms, with or without               *
+ * modification, are permitted provided that the following are met:                 *
+ *                                                                                  *
+ * You must reproduce the above copyright notice.                                   *
+ *                                                                                  *
+ * Neither the name of the copyright holder nor the names of its contributors       *
+ * may be used to endorse or promote products derived from this software            *
+ * without specific, prior, written permission from at least the copyright holder.  *
+ *                                                                                  *
+ * You must include the following terms in your license and/or other materials      *
+ * provided with the software.                                                      *
+ *                                                                                  *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"      *
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        *
+ * IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A       *
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER        *
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,         *
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  *
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS      *
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN          *
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  *
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY   *
+ * OF SUCH DAMAGE.                                                                  *
+ *                                                                                  *
+ * Without limiting the foregoing, the software may implement third party           *
+ * technologies for which you must obtain licenses from parties other than AMD.     *
+ * You agree that AMD has not obtained or conveyed to you, and that you shall       *
+ * be responsible for obtaining the rights to use and/or distribute the applicable  *
+ * underlying intellectual property rights related to the third party technologies. *
+ * These third party technologies are not licensed hereunder.                       *
+ *                                                                                  *
+ * If you use the software (in whole or in part), you shall adhere to all           *
+ * applicable U.S., European, and other export laws, including but not limited to   *
+ * the U.S. Export Administration Regulations ("EAR") (15 C.F.R Sections 730-774),  *
+ * and E.U. Council Regulation (EC) No 428/2009 of 5 May 2009.  Further, pursuant   *
+ * to Section 740.6 of the EAR, you hereby certify that, except pursuant to a       *
+ * license granted by the United States Department of Commerce Bureau of Industry   *
+ * and Security or as otherwise permitted pursuant to a License Exception under     *
+ * the U.S. Export Administration Regulations ("EAR"), you will not (1) export,     *
+ * re-export or release to a national of a country in Country Groups D:1, E:1 or    *
+ * E:2 any restricted technology, software, or source code you receive hereunder,   *
+ * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such       *
+ * technology or software, if such foreign produced direct product is subject to    *
+ * national security controls as identified on the Commerce Control List (currently *
+ * found in Supplement 1 to Part 774 of EAR).  For the most current Country Group   *
+ * listings, or for additional information about the EAR or your obligations under  *
+ * those regulations, please refer to the U.S. Bureau of Industry and Security's    *
+ * website at http://www.bis.doc.gov/.                                              *
+ *                                                                                  *
+\************************************************************************************/
+
+#include "hip/hip_runtime.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <algorithm>
+#include "../graph_parser/parse.h"
+#include "../graph_parser/util.h"
+#include "kernel.h"
+
+#ifdef GEM5_FUSION
+#include <stdint.h>
+extern "C" {
+void m5_work_begin(uint64_t workid, uint64_t threadid);
+void m5_work_end(uint64_t workid, uint64_t threadid);
+}
+#endif
+
+#define BIGNUM 99999999
+
+void print_vector(int *vector, int num);
+
+int main(int argc, char **argv)
+{
+    char *tmpchar;
+    bool directed = 1;
+
+    int num_nodes;
+    int num_edges;
+    int file_format = 1;
+
+    hipError_t err = hipSuccess;
+
+    if (argc == 3) {
+        tmpchar = argv[1];  // Graph inputfile
+        file_format = atoi(argv[2]);
+    } else {
+        fprintf(stderr, "You did something wrong!\n");
+        exit(1);
+    }
+
+    // Allocate the csr structure
+    csr_array *csr;
+
+    // Parse the graph and store it into the CSR structure
+    if (file_format == 1) {
+        csr = parseMetis_transpose(tmpchar, &num_nodes, &num_edges, directed);
+    } else if (file_format == 0) {
+        csr = parseCOO_transpose(tmpchar, &num_nodes, &num_edges, directed);
+    } else {
+        printf("reserve for future");
+        exit(1);
+    }
+
+    // Allocate ell and transform from csr
+    ell_array *ell = csr2ell(csr, num_nodes, num_edges, BIGNUM);
+    int height = ell->max_height;
+
+    // Allocate the cost array
+    int *cost_array = (int *)malloc(num_nodes * sizeof(int));
+    if (!cost_array) fprintf(stderr, "malloc failed cost_array\n");
+
+    // Set the cost array to zero
+    for (int i = 0; i < num_nodes; i++) {
+        cost_array[i] = 0;
+    }
+
+    // Create device-side buffers
+    int *ell_col_d;
+    int *ell_data_d;
+    int *vector_d1;
+    int *vector_d2;
+    int *stop_d;
+
+    // Create the device-side graph structure
+    err = hipMalloc(&ell_col_d, height * num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc ell_col_d (size:%d) => %s\n", height * num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&ell_data_d, height * num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc ell_data_d (size:%d) => %s\n", height * num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Termination variable
+    err = hipMalloc(&stop_d, sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc stop_d (size:%d) => %s\n", 1, hipGetErrorString(err));
+        return -1;
+    }
+
+    // Create the device-side buffers for sssp
+    err = hipMalloc(&vector_d1, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc vector_d1 (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+    err = hipMalloc(&vector_d2, num_nodes * sizeof(int));
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMalloc vector_d2 (size:%d) => %s\n", num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    double timer1 = gettime();
+
+#ifdef GEM5_FUSION
+    m5_work_begin(0, 0);
+#endif
+
+    // Copy data to device side buffers
+    err = hipMemcpy(ell_col_d, ell->col_array, height * num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy ell_col_d (size:%d) => %s\n", height * num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    err = hipMemcpy(ell_data_d, ell->data_array, height * num_nodes * sizeof(int), hipMemcpyHostToDevice);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: hipMemcpy ell_data_d (size:%d) => %s\n", height * num_nodes, hipGetErrorString(err));
+        return -1;
+    }
+
+    double timer3 = gettime();
+
+    // Work dimensions
+    int block_size = 64;
+    int num_blocks = (num_nodes + block_size - 1) / block_size;
+
+    dim3 threads(block_size, 1, 1);
+    dim3 grid(num_blocks, 1, 1);
+
+    // Source vertex 0
+    int sourceVertex = 0;
+
+    // Launch the initialization kernel
+    hipLaunchKernelGGL(vector_init, dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2, sourceVertex, num_nodes);
+    hipDeviceSynchronize();
+    err = hipGetLastError();
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: vector_init failed (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+    int stop = 1;
+    int cnt = 0;
+    // Main computation loop
+    for (int i = 1; i < num_nodes; i++) {
+        // Reset the termination variable
+        stop = 0;
+
+        // Copy the termination variable to the device
+        err = hipMemcpy(stop_d, &stop, sizeof(int), hipMemcpyHostToDevice);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: write stop_d (%s)\n", hipGetErrorString(err));
+            return -1;
+        }
+
+        // Launch the assignment kernel
+        hipLaunchKernelGGL(vector_assign, dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2, num_nodes);
+
+        // Launch the min.+ kernel
+        hipLaunchKernelGGL(ell_min_dot_plus_kernel, dim3(grid), dim3(threads), 0, 0, num_nodes, height,
+                                                    ell_col_d, ell_data_d,
+                                                    vector_d1, vector_d2);
+
+        // Launch the check kernel
+        hipLaunchKernelGGL(vector_diff, dim3(grid), dim3(threads), 0, 0, vector_d1, vector_d2,
+                                        stop_d, num_nodes);
+
+        // Read the termination variable back
+        err = hipMemcpy(&stop, stop_d, sizeof(int), hipMemcpyDeviceToHost);
+        if (err != hipSuccess) {
+            fprintf(stderr, "ERROR: read stop_d (%s)\n", hipGetErrorString(err));
+            return -1;
+        }
+
+        // Exit the loop
+        if (stop == 0) {
+            break;
+        }
+        cnt++;
+    }
+    hipDeviceSynchronize();
+    double timer4 = gettime();
+
+    // Read the cost_array back
+    err = hipMemcpy(cost_array, vector_d1, num_nodes * sizeof(int), hipMemcpyDeviceToHost);
+    if (err != hipSuccess) {
+        fprintf(stderr, "ERROR: read vector_d1 (%s)\n", hipGetErrorString(err));
+        return -1;
+    }
+
+#ifdef GEM5_FUSION
+    m5_work_end(0, 0);
+#endif
+
+    double timer2 = gettime();
+
+    // Print the timing statistics
+    printf("kernel + memcpy time = %lf ms\n", (timer2 - timer1) * 1000);
+    printf("kernel time = %lf ms\n", (timer4 - timer3) * 1000);
+    printf("number iterations = %d\n", cnt);
+
+#if 1
+    // Print cost_array
+    print_vector(cost_array, num_nodes);
+#endif
+
+    // Clean up the host arrays
+    free(cost_array);
+    csr->freeArrays();
+    free(csr);
+
+    free(ell->col_array);
+    free(ell->data_array);
+    free(ell);
+
+    // Clean up the device-side buffers
+    hipFree(ell_col_d);
+    hipFree(ell_data_d);
+    hipFree(stop_d);
+    hipFree(vector_d1);
+    hipFree(vector_d2);
+
+    return 0;
+}
+
+void print_vector(int *vector, int num)
+{
+
+    FILE * fp = fopen("result.out", "w");
+    if (!fp) {
+        printf("ERROR: unable to open result.txt\n");
+    }
+
+    for (int i = 0; i < num; i++)
+        fprintf(fp, "%d: %d\n", i + 1, vector[i]);
+
+    fclose(fp);
+}
commit	9ace72a49010a45c3ca2ddc80006a96b65cfe0e1	[log] [tgz]
author	Matt Sinclair <mattdsinclair@gmail.com>	Tue Oct 12 21:42:59 2021 -0500
committer	Matt Sinclair <mattdsinclair@gmail.com>	Thu Oct 21 18:55:28 2021 +0000
tree	0793d39aa515edb0476bd0ed000961c59429a8bf
parent	303e5fe39ffb6d44b5ca394a2db71dff2a035468 [diff]