resources: Add square to resources

Change-Id: I3dbcfaa5148796bc2f7d2a810103488b7b6eb104
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5-resources/+/29213
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Bobby R. Bruce <bbruce@ucdavis.edu>
Tested-by: Bobby R. Bruce <bbruce@ucdavis.edu>
diff --git a/Makefile b/Makefile
index 0f6cb5b..d7b57e9 100644
--- a/Makefile
+++ b/Makefile
@@ -45,6 +45,12 @@
 pthreads-a64-out    := $(pthreads-out)/aarch64
 pthreads-ri5-out    := $(pthreads-out)/riscv64
 
+# Square Parameters
+square-dir			:= $(source-dir)/square
+square-bin			:= $(square-dir)/bin
+square-bench		:= $(square-bin)/square.o
+square-out			:= $(output-dir)/test-progs/square
+
 # RISCV-Tests
 .PHONY: riscv-tests
 riscv-tests: $(riscv-benchmarks)
@@ -144,10 +150,26 @@
 				clean-pthreads-riscv64
 	-rm -rf $(pthreads-out)
 
+# Square
+.PHONY: square
+square: $(square-out)
+	-cp $(square-bench) $(square-out)/
+
+$(square-out): $(square-bin)
+	mkdir -p $(square-out)
+
+$(square-bin):
+	make -C $(square-dir) gfx8-apu
+
+.PHONY: clean-square
+clean-square:
+	-rm -r $(square-out)
+	-make -C $(square-dir) clean
+
 # Global
 .PHONY: all
-all: riscv-tests insttests pthreads
+all: riscv-tests insttests pthreads square
 
 .PHONY: clean
-clean: clean-riscv-tests clean-insttests clean-pthreads
+clean: clean-riscv-tests clean-insttests clean-pthreads clean-square
 	-rm -r $(output-dir)
diff --git a/README.md b/README.md
index 990e2d0..6cd5428 100644
--- a/README.md
+++ b/README.md
@@ -178,6 +178,17 @@
 The output of these compilations can be found in
 `output/test-progs/pthreads`
 
+# Resource: Square
+
+## Compilation
+
+```
+make square
+```
+
+The output of this compilation can be found at
+`output/test-progs/square/`
+
 # Licensing
 
 Each project under the `src` is under a different license. Before using
@@ -189,3 +200,5 @@
 `src/insttests`.
 * **pthreads**: Consult individual copyright notices of source files in
 `src/pthreads`.
+* **square**: Consult individual copyright notices of source files in
+`src/square`.
diff --git a/src/square/Makefile b/src/square/Makefile
new file mode 100644
index 0000000..5d7f226
--- /dev/null
+++ b/src/square/Makefile
@@ -0,0 +1,15 @@
+HIP_PATH?= /opt/rocm/hip
+HIPCC=$(HIP_PATH)/bin/hipcc
+
+BIN_DIR?= ./bin
+
+gfx8-apu: square.cpp $(BIN_DIR)
+	$(HIPCC) --amdgpu-target=gfx801 $(CXXFLAGS) square.cpp -o $(BIN_DIR)/square.o
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: gfx8-apu clean
diff --git a/src/square/square.cpp b/src/square/square.cpp
new file mode 100644
index 0000000..87bf597
--- /dev/null
+++ b/src/square/square.cpp
@@ -0,0 +1,108 @@
+/*
+Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <stdio.h>
+#include "hip/hip_runtime.h"
+
+#define CHECK(cmd) \
+{\
+    hipError_t error  = cmd;\
+    if (error != hipSuccess) { \
+      fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
+    exit(EXIT_FAILURE);\
+    }\
+}
+
+/*
+ * Square each element in the array A and write to array C.
+ */
+template <typename T>
+__global__ void
+vector_square(T *C_d, const T *A_d, size_t N)
+{
+    size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
+    size_t stride = hipBlockDim_x * hipGridDim_x ;
+
+    for (size_t i=offset; i<N; i+=stride) {
+        C_d[i] = A_d[i] * A_d[i];
+    }
+}
+
+
+int main(int argc, char *argv[])
+{
+#ifdef DGPU
+    float *A_d, *C_d;
+#endif
+    float *A_h, *C_h;
+    size_t N = 1000000;
+    size_t Nbytes = N * sizeof(float);
+
+    hipDeviceProp_t props;
+    CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/));
+    printf ("info: running on device %s\n", props.name);
+    #ifdef __HIP_PLATFORM_HCC__
+      printf ("info: architecture on AMD GPU device is: %d\n",props.gcnArch);
+    #endif
+    printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
+    A_h = (float*)malloc(Nbytes);
+    CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
+    C_h = (float*)malloc(Nbytes);
+    CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
+    // Fill with Phi + i
+    for (size_t i=0; i<N; i++)
+    {
+        A_h[i] = 1.618f + i;
+    }
+
+#ifdef DGPU
+    printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
+    CHECK(hipMalloc(&A_d, Nbytes));
+    CHECK(hipMalloc(&C_d, Nbytes));
+
+    printf ("info: copy Host2Device\n");
+    CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
+#endif
+
+    const unsigned blocks = 512;
+    const unsigned threadsPerBlock = 256;
+
+    printf ("info: launch 'vector_square' kernel\n");
+#ifdef DGPU
+    hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
+
+    printf ("info: copy Device2Host\n");
+    CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
+#else
+    hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_h, A_h, N);
+#endif
+
+    printf ("info: check result\n");
+    for (size_t i=0; i<N; i++)  {
+        if (C_h[i] != A_h[i] * A_h[i]) {
+            CHECK(hipErrorUnknown);
+        }
+    }
+    printf ("PASSED!\n");
+    return 0;
+}
+