Initial commit

This commit is contained in:
Carl Pearson
2024-12-03 14:15:37 -08:00
commit cf83db731c
13 changed files with 381 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
build*

21
CMakeLists.txt Normal file
View File

@@ -0,0 +1,21 @@
project(MI300A-XNACK LANGUAGES CXX)
add_executable(hipmalloc_hostaccess hipmalloc_hostaccess.cpp)
add_executable(malloc_hostaccess malloc_hostaccess.cpp)
add_executable(malloc_devaccess malloc_devaccess.cpp)
add_executable(hipmalloc_devaccess hipmalloc_devaccess.cpp)
add_executable(malloc_hipmemset malloc_hipmemset.cpp)
add_executable(hipmalloc_hipmemset hipmalloc_hipmemset.cpp)
add_executable(hipmallocmanaged_devaccess hipmallocmanaged_devaccess.cpp)
add_executable(hipmallocmanaged_hostaccess hipmallocmanaged_hostaccess.cpp)
add_executable(hipmallocmanaged_hipmemset hipmallocmanaged_hipmemset.cpp)
enable_testing()
add_test(NAME hipMalloc+HostAccess COMMAND hipmalloc_hostaccess)
add_test(NAME malloc+HostAccess COMMAND malloc_hostaccess)
add_test(NAME malloc+DevAccess COMMAND malloc_devaccess)
add_test(NAME hipMalloc+DevAccess COMMAND hipmalloc_devaccess)
add_test(NAME malloc+hipMemset COMMAND malloc_hipmemset)
add_test(NAME hipMalloc+hipMemset COMMAND hipmalloc_hipmemset)
add_test(NAME hipMallocManaged+DevAccess COMMAND hipmallocmanaged_devaccess)
add_test(NAME hipMallocManaged+HostAccess COMMAND hipmallocmanaged_hostaccess)
add_test(NAME hipMallocManaged+HipMemset COMMAND hipmallocmanaged_hipmemset)

72
README.md Normal file
View File

@@ -0,0 +1,72 @@
# mi300a-xnack
Test HSA\_XNACK behavior on AMD MI300A
XNACK refers to the GPU's ability to retry memory accesses that failed due a page fault (which normally would lead to a memory access error), and instead retrieve the missing page [(source)](https://rocm.docs.amd.com/en/docs-6.2.1/conceptual/gpu-memory.html#xnack)
## Quick Start
```c++
./run.sh
```
## ROCm 6.2.1 Results
### Device Name
| `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|-|-|
| `amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-` | `amdgcn-amd-amdhsa--gfx942:sramecc+:xnack+` |
* `xnack+`: XNACK is available and enabled
* `xnack-`: XNACK is available and disabled [(source)](https://rocm.docs.amd.com/en/docs-6.2.1/conceptual/gpu-memory.html#xnack)
### Compiled with `--offload-arch=gfx942` or no `--offload-arch` provided
*GPU kernels will run regardless of whether XNACK is enabled*
| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|-|-|-|-|
| hipMalloc | GPU Kernel | yes | yes |
| hipMalloc | Host Loop | yes | yes |
| hipMalloc | hipMemset | yes | yes |
| malloc | GPU Kernel | **segfault** | **yes** |
| malloc | Host Loop | yes | yes |
| malloc | hipMemset | HIP runtime error | HIP runtime error |
| hipMallocManaged | GPU Kernel | yes | yes |
| hipMallocManaged | Host Loop | yes | yes |
| hipMallocManaged | hipMemset | yes | yes |
### Compiled with `--offload-arch=gfx942:xnack-`
*GPU kernels will run only if XNACK is disabled*
| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|-|-|-|-|
| hipMalloc | GPU Kernel | yes | *N/A* |
| hipMalloc | Host Loop | yes | yes |
| hipMalloc | hipMemset | yes | yes |
| malloc | GPU Kernel | **segfault** | **N/A** |
| malloc | Host Loop | yes | yes |
| malloc | hipMemset | HIP runtime error | HIP runtime error |
| hipMallocManaged | GPU Kernel | yes | *N/A* |
| hipMallocManaged | Host Loop | yes | yes |
| hipMallocManaged | hipMemset | yes | yes |
### Compiled with `--offload-arch=gfx942:xnack+`
*GPU kernels will run only if XNACK is enabled*
| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|-|-|-|-|
| hipMalloc | GPU Kernel | *N/A* | yes |
| hipMalloc | Host Loop | yes | yes |
| hipMalloc | hipMemset | yes | yes |
| malloc | GPU Kernel | *N/A* | **yes** |
| malloc | Host Loop | yes | yes |
| malloc | hipMemset | HIP runtime error | HIP runtime error |
| hipMallocManaged | GPU Kernel | *N/A* | yes |
| hipMallocManaged | Host Loop | yes | yes |
| hipMallocManaged | hipMemset | yes | yes |

30
hipmalloc_devaccess.cpp Normal file
View File

@@ -0,0 +1,30 @@
#include <iostream>
#include <hip/hip_runtime.h>
#define HIP(e) \
if (hipError_t err = (e); err != hipSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
exit(1); \
}
__global__ void set(double* p, size_t n) {
for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) {
p[idx] = idx;
}
}
int main(void) {
size_t n = 1024 * 1024;
double *p;
HIP(hipMalloc(&p, sizeof(double) * n));
HIP(hipDeviceSynchronize());
hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n);
HIP(hipDeviceSynchronize());
return 0;
}

23
hipmalloc_hipmemset.cpp Normal file
View File

@@ -0,0 +1,23 @@
#include <iostream>
#include <hip/hip_runtime.h>
#define HIP(e) \
if (hipError_t err = (e); err != hipSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
exit(1); \
}
int main(void) {
size_t n = 1024 * 1024;
double *p;
HIP(hipMalloc(&p, sizeof(double)*n));
HIP(hipDeviceSynchronize());
HIP(hipMemset(p, 17, n * sizeof(double)));
HIP(hipDeviceSynchronize());
return 0;
}

25
hipmalloc_hostaccess.cpp Normal file
View File

@@ -0,0 +1,25 @@
#include <iostream>
#include <hip/hip_runtime.h>
#define HIP(e) \
if (hipError_t err = (e); err != hipSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
exit(1); \
}
int main(void) {
size_t n = 1024 * 1024;
double *p;
HIP(hipMalloc(&p, sizeof(double) * n));
HIP(hipDeviceSynchronize());
for (size_t i = 0; i < n; ++i) {
p[i] = i;
}
return 0;
}

View File

@@ -0,0 +1,31 @@
#include <iostream>
#include <hip/hip_runtime.h>
#define HIP(e) \
if (hipError_t err = (e); err != hipSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
exit(1); \
}
__global__ void set(double* p, size_t n) {
for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) {
p[idx] = idx;
}
}
int main(void) {
size_t n = 1024 * 1024;
double *p;
HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal));
HIP(hipDeviceSynchronize());
hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n);
HIP(hipDeviceSynchronize());
HIP(hipFree(p));
return 0;
}

View File

@@ -0,0 +1,23 @@
#include <iostream>
#include <hip/hip_runtime.h>
#define HIP(e) \
if (hipError_t err = (e); err != hipSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
exit(1); \
}
int main(void) {
size_t n = 1024 * 1024;
double *p;
HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal));
HIP(hipDeviceSynchronize());
HIP(hipMemset(p, 17, sizeof(double)*n));
HIP(hipDeviceSynchronize());
return 0;
}

View File

@@ -0,0 +1,27 @@
#include <iostream>
#include <hip/hip_runtime.h>
#define HIP(e) \
if (hipError_t err = (e); err != hipSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
exit(1); \
}
int main(void) {
size_t n = 1024 * 1024;
double *p;
HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal));
HIP(hipDeviceSynchronize());
for (size_t i = 0; i < n; ++i) {
p[i] = i;
}
HIP(hipDeviceSynchronize());
return 0;
}

29
malloc_devaccess.cpp Normal file
View File

@@ -0,0 +1,29 @@
#include <iostream>
#include <hip/hip_runtime.h>
#define HIP(e) \
if (hipError_t err = (e); err != hipSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
exit(1); \
}
__global__ void set(double* p, size_t n) {
for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) {
p[idx] = idx;
}
}
int main(void) {
size_t n = 1024 * 1024;
double *p;
p = (double*)malloc(sizeof(double)*n);
HIP(hipDeviceSynchronize());
hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n);
HIP(hipDeviceSynchronize());
return 0;
}

23
malloc_hipmemset.cpp Normal file
View File

@@ -0,0 +1,23 @@
#include <iostream>
#include <hip/hip_runtime.h>
#define HIP(e) \
if (hipError_t err = (e); err != hipSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
exit(1); \
}
int main(void) {
size_t n = 1024 * 1024;
double *p;
p = (double*)malloc(sizeof(double)*n);
HIP(hipDeviceSynchronize());
HIP(hipMemset(p, 17, n * sizeof(double)));
HIP(hipDeviceSynchronize());
return 0;
}

24
malloc_hostaccess.cpp Normal file
View File

@@ -0,0 +1,24 @@
#include <iostream>
#include <hip/hip_runtime.h>
#define HIP(e) \
if (hipError_t err = (e); err != hipSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
exit(1); \
}
int main(void) {
size_t n = 1024 * 1024;
double *p;
p = (double*)malloc(sizeof(double) * n);
for (size_t i = 0; i < n; ++i) {
p[i] = i;
}
return 0;
}

52
run.sh Executable file
View File

@@ -0,0 +1,52 @@
#! /bin/bash
set -eou pipefail
if ! module is-loaded rocm; then
echo module load rocm
exit 1
fi
echo "==================="
echo "= HSA_XNACK unset ="
echo "==================="
unset HSA_XNACK
rocminfo | grep xnack
echo "==================="
echo "= HSA_XNACK=1 ="
echo "==================="
export HSA_XNACK=1
rocminfo | grep xnack
echo "==================="
echo "= HSA_XNACK=0 ="
echo "==================="
export HSA_XNACK=0
rocminfo | grep xnack
set +eou pipefail
for arch in "" "--offload-arch=gfx942:xnack+" "--offload-arch=gfx942:xnack-" "--offload-arch=gfx942"; do
rm -rf "build-$arch"
cmake -S . -B "build-$arch" -DCMAKE_CXX_COMPILER=hipcc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-Wall -Wextra $arch"
VERBOSE=1 make -C "build-$arch"
echo "================================"
echo "= HSA_XNACK unset $arch ="
echo "================================"
unset HSA_XNACK
ctest --test-dir "build-$arch"
echo "================================"
echo "= HSA_XNACK=1 $arch ="
echo "================================"
export HSA_XNACK=1
ctest --test-dir "build-$arch"
echo "================================"
echo "= HSA_XNACK=0 $arch ="
echo "================================"
export HSA_XNACK=0
ctest --test-dir "build-$arch"
done;