commit cf83db731ccfa464fa59e5bb1f262370a3c1c786 Author: Carl Pearson Date: Tue Dec 3 14:15:37 2024 -0800 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b2211d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build* diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..bd80cba --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,21 @@ +project(MI300A-XNACK LANGUAGES CXX) +add_executable(hipmalloc_hostaccess hipmalloc_hostaccess.cpp) +add_executable(malloc_hostaccess malloc_hostaccess.cpp) +add_executable(malloc_devaccess malloc_devaccess.cpp) +add_executable(hipmalloc_devaccess hipmalloc_devaccess.cpp) +add_executable(malloc_hipmemset malloc_hipmemset.cpp) +add_executable(hipmalloc_hipmemset hipmalloc_hipmemset.cpp) +add_executable(hipmallocmanaged_devaccess hipmallocmanaged_devaccess.cpp) +add_executable(hipmallocmanaged_hostaccess hipmallocmanaged_hostaccess.cpp) +add_executable(hipmallocmanaged_hipmemset hipmallocmanaged_hipmemset.cpp) + +enable_testing() +add_test(NAME hipMalloc+HostAccess COMMAND hipmalloc_hostaccess) +add_test(NAME malloc+HostAccess COMMAND malloc_hostaccess) +add_test(NAME malloc+DevAccess COMMAND malloc_devaccess) +add_test(NAME hipMalloc+DevAccess COMMAND hipmalloc_devaccess) +add_test(NAME malloc+hipMemset COMMAND malloc_hipmemset) +add_test(NAME hipMalloc+hipMemset COMMAND hipmalloc_hipmemset) +add_test(NAME hipMallocManaged+DevAccess COMMAND hipmallocmanaged_devaccess) +add_test(NAME hipMallocManaged+HostAccess COMMAND hipmallocmanaged_hostaccess) +add_test(NAME hipMallocManaged+HipMemset COMMAND hipmallocmanaged_hipmemset) diff --git a/README.md b/README.md new file mode 100644 index 0000000..d891536 --- /dev/null +++ b/README.md @@ -0,0 +1,72 @@ +# mi300a-xnack + +Test HSA\_XNACK behavior on AMD MI300A + +XNACK refers to the GPU's ability to retry memory accesses that failed due a page fault (which normally would lead to a memory access error), and instead retrieve the missing page [(source)](https://rocm.docs.amd.com/en/docs-6.2.1/conceptual/gpu-memory.html#xnack) + +## Quick Start + +```c++ +./run.sh +``` + +## ROCm 6.2.1 Results + +### Device Name + +| `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` | +|-|-| +| `amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-` | `amdgcn-amd-amdhsa--gfx942:sramecc+:xnack+` | + +* `xnack+`: XNACK is available and enabled +* `xnack-`: XNACK is available and disabled [(source)](https://rocm.docs.amd.com/en/docs-6.2.1/conceptual/gpu-memory.html#xnack) + + +### Compiled with `--offload-arch=gfx942` or no `--offload-arch` provided + +*GPU kernels will run regardless of whether XNACK is enabled* + +| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` | +|-|-|-|-| +| hipMalloc | GPU Kernel | yes | yes | +| hipMalloc | Host Loop | yes | yes | +| hipMalloc | hipMemset | yes | yes | +| malloc | GPU Kernel | **segfault** | **yes** | +| malloc | Host Loop | yes | yes | +| malloc | hipMemset | HIP runtime error | HIP runtime error | +| hipMallocManaged | GPU Kernel | yes | yes | +| hipMallocManaged | Host Loop | yes | yes | +| hipMallocManaged | hipMemset | yes | yes | + +### Compiled with `--offload-arch=gfx942:xnack-` + +*GPU kernels will run only if XNACK is disabled* + +| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` | +|-|-|-|-| +| hipMalloc | GPU Kernel | yes | *N/A* | +| hipMalloc | Host Loop | yes | yes | +| hipMalloc | hipMemset | yes | yes | +| malloc | GPU Kernel | **segfault** | **N/A** | +| malloc | Host Loop | yes | yes | +| malloc | hipMemset | HIP runtime error | HIP runtime error | +| hipMallocManaged | GPU Kernel | yes | *N/A* | +| hipMallocManaged | Host Loop | yes | yes | +| hipMallocManaged | hipMemset | yes | yes | + +### Compiled with `--offload-arch=gfx942:xnack+` + +*GPU kernels will run only if XNACK is enabled* + +| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` | +|-|-|-|-| +| hipMalloc | GPU Kernel | *N/A* | yes | +| hipMalloc | Host Loop | yes | yes | +| hipMalloc | hipMemset | yes | yes | +| malloc | GPU Kernel | *N/A* | **yes** | +| malloc | Host Loop | yes | yes | +| malloc | hipMemset | HIP runtime error | HIP runtime error | +| hipMallocManaged | GPU Kernel | *N/A* | yes | +| hipMallocManaged | Host Loop | yes | yes | +| hipMallocManaged | hipMemset | yes | yes | + diff --git a/hipmalloc_devaccess.cpp b/hipmalloc_devaccess.cpp new file mode 100644 index 0000000..3f5612e --- /dev/null +++ b/hipmalloc_devaccess.cpp @@ -0,0 +1,30 @@ +#include +#include + +#define HIP(e) \ +if (hipError_t err = (e); err != hipSuccess) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \ + exit(1); \ +} + +__global__ void set(double* p, size_t n) { + for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) { + p[idx] = idx; + } +} + +int main(void) { + + size_t n = 1024 * 1024; + double *p; + + HIP(hipMalloc(&p, sizeof(double) * n)); + HIP(hipDeviceSynchronize()); + + hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n); + HIP(hipDeviceSynchronize()); + + return 0; +} + + diff --git a/hipmalloc_hipmemset.cpp b/hipmalloc_hipmemset.cpp new file mode 100644 index 0000000..628d1a5 --- /dev/null +++ b/hipmalloc_hipmemset.cpp @@ -0,0 +1,23 @@ +#include +#include + +#define HIP(e) \ +if (hipError_t err = (e); err != hipSuccess) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \ + exit(1); \ +} + +int main(void) { + + size_t n = 1024 * 1024; + double *p; + HIP(hipMalloc(&p, sizeof(double)*n)); + HIP(hipDeviceSynchronize()); + + HIP(hipMemset(p, 17, n * sizeof(double))); + HIP(hipDeviceSynchronize()); + + return 0; +} + + diff --git a/hipmalloc_hostaccess.cpp b/hipmalloc_hostaccess.cpp new file mode 100644 index 0000000..2e4ab6a --- /dev/null +++ b/hipmalloc_hostaccess.cpp @@ -0,0 +1,25 @@ +#include +#include + +#define HIP(e) \ +if (hipError_t err = (e); err != hipSuccess) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \ + exit(1); \ +} + +int main(void) { + + size_t n = 1024 * 1024; + double *p; + + HIP(hipMalloc(&p, sizeof(double) * n)); + HIP(hipDeviceSynchronize()); + + for (size_t i = 0; i < n; ++i) { + p[i] = i; + } + + return 0; +} + + diff --git a/hipmallocmanaged_devaccess.cpp b/hipmallocmanaged_devaccess.cpp new file mode 100644 index 0000000..81296b5 --- /dev/null +++ b/hipmallocmanaged_devaccess.cpp @@ -0,0 +1,31 @@ +#include +#include + +#define HIP(e) \ +if (hipError_t err = (e); err != hipSuccess) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \ + exit(1); \ +} + +__global__ void set(double* p, size_t n) { + for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) { + p[idx] = idx; + } +} + +int main(void) { + + size_t n = 1024 * 1024; + double *p; + + HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal)); + HIP(hipDeviceSynchronize()); + + hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n); + HIP(hipDeviceSynchronize()); + HIP(hipFree(p)); + + return 0; +} + + diff --git a/hipmallocmanaged_hipmemset.cpp b/hipmallocmanaged_hipmemset.cpp new file mode 100644 index 0000000..9e0c3d7 --- /dev/null +++ b/hipmallocmanaged_hipmemset.cpp @@ -0,0 +1,23 @@ +#include +#include + +#define HIP(e) \ +if (hipError_t err = (e); err != hipSuccess) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \ + exit(1); \ +} + +int main(void) { + + size_t n = 1024 * 1024; + double *p; + + HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal)); + HIP(hipDeviceSynchronize()); + HIP(hipMemset(p, 17, sizeof(double)*n)); + HIP(hipDeviceSynchronize()); + + return 0; +} + + diff --git a/hipmallocmanaged_hostaccess.cpp b/hipmallocmanaged_hostaccess.cpp new file mode 100644 index 0000000..9a6ce5c --- /dev/null +++ b/hipmallocmanaged_hostaccess.cpp @@ -0,0 +1,27 @@ +#include +#include + +#define HIP(e) \ +if (hipError_t err = (e); err != hipSuccess) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \ + exit(1); \ +} + +int main(void) { + + size_t n = 1024 * 1024; + double *p; + + HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal)); + HIP(hipDeviceSynchronize()); + + for (size_t i = 0; i < n; ++i) { + p[i] = i; + } + + HIP(hipDeviceSynchronize()); + + return 0; +} + + diff --git a/malloc_devaccess.cpp b/malloc_devaccess.cpp new file mode 100644 index 0000000..7a9826f --- /dev/null +++ b/malloc_devaccess.cpp @@ -0,0 +1,29 @@ +#include +#include + +#define HIP(e) \ +if (hipError_t err = (e); err != hipSuccess) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \ + exit(1); \ +} + +__global__ void set(double* p, size_t n) { + for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) { + p[idx] = idx; + } +} + +int main(void) { + + size_t n = 1024 * 1024; + double *p; + p = (double*)malloc(sizeof(double)*n); + HIP(hipDeviceSynchronize()); + + hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n); + HIP(hipDeviceSynchronize()); + + return 0; +} + + diff --git a/malloc_hipmemset.cpp b/malloc_hipmemset.cpp new file mode 100644 index 0000000..337ad92 --- /dev/null +++ b/malloc_hipmemset.cpp @@ -0,0 +1,23 @@ +#include +#include + +#define HIP(e) \ +if (hipError_t err = (e); err != hipSuccess) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \ + exit(1); \ +} + +int main(void) { + + size_t n = 1024 * 1024; + double *p; + p = (double*)malloc(sizeof(double)*n); + HIP(hipDeviceSynchronize()); + + HIP(hipMemset(p, 17, n * sizeof(double))); + HIP(hipDeviceSynchronize()); + + return 0; +} + + diff --git a/malloc_hostaccess.cpp b/malloc_hostaccess.cpp new file mode 100644 index 0000000..2531b82 --- /dev/null +++ b/malloc_hostaccess.cpp @@ -0,0 +1,24 @@ +#include +#include + +#define HIP(e) \ +if (hipError_t err = (e); err != hipSuccess) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \ + exit(1); \ +} + +int main(void) { + + size_t n = 1024 * 1024; + double *p; + + p = (double*)malloc(sizeof(double) * n); + + for (size_t i = 0; i < n; ++i) { + p[i] = i; + } + + return 0; +} + + diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..5e55097 --- /dev/null +++ b/run.sh @@ -0,0 +1,52 @@ +#! /bin/bash + +set -eou pipefail + +if ! module is-loaded rocm; then + echo module load rocm + exit 1 +fi + +echo "===================" +echo "= HSA_XNACK unset =" +echo "===================" +unset HSA_XNACK +rocminfo | grep xnack + +echo "===================" +echo "= HSA_XNACK=1 =" +echo "===================" +export HSA_XNACK=1 +rocminfo | grep xnack + +echo "===================" +echo "= HSA_XNACK=0 =" +echo "===================" +export HSA_XNACK=0 +rocminfo | grep xnack + +set +eou pipefail + +for arch in "" "--offload-arch=gfx942:xnack+" "--offload-arch=gfx942:xnack-" "--offload-arch=gfx942"; do + +rm -rf "build-$arch" +cmake -S . -B "build-$arch" -DCMAKE_CXX_COMPILER=hipcc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-Wall -Wextra $arch" +VERBOSE=1 make -C "build-$arch" + +echo "================================" +echo "= HSA_XNACK unset $arch =" +echo "================================" +unset HSA_XNACK +ctest --test-dir "build-$arch" +echo "================================" +echo "= HSA_XNACK=1 $arch =" +echo "================================" +export HSA_XNACK=1 +ctest --test-dir "build-$arch" +echo "================================" +echo "= HSA_XNACK=0 $arch =" +echo "================================" +export HSA_XNACK=0 +ctest --test-dir "build-$arch" + +done;