Initial commit
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
build*
|
21
CMakeLists.txt
Normal file
21
CMakeLists.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
project(MI300A-XNACK LANGUAGES CXX)
|
||||
add_executable(hipmalloc_hostaccess hipmalloc_hostaccess.cpp)
|
||||
add_executable(malloc_hostaccess malloc_hostaccess.cpp)
|
||||
add_executable(malloc_devaccess malloc_devaccess.cpp)
|
||||
add_executable(hipmalloc_devaccess hipmalloc_devaccess.cpp)
|
||||
add_executable(malloc_hipmemset malloc_hipmemset.cpp)
|
||||
add_executable(hipmalloc_hipmemset hipmalloc_hipmemset.cpp)
|
||||
add_executable(hipmallocmanaged_devaccess hipmallocmanaged_devaccess.cpp)
|
||||
add_executable(hipmallocmanaged_hostaccess hipmallocmanaged_hostaccess.cpp)
|
||||
add_executable(hipmallocmanaged_hipmemset hipmallocmanaged_hipmemset.cpp)
|
||||
|
||||
enable_testing()
|
||||
add_test(NAME hipMalloc+HostAccess COMMAND hipmalloc_hostaccess)
|
||||
add_test(NAME malloc+HostAccess COMMAND malloc_hostaccess)
|
||||
add_test(NAME malloc+DevAccess COMMAND malloc_devaccess)
|
||||
add_test(NAME hipMalloc+DevAccess COMMAND hipmalloc_devaccess)
|
||||
add_test(NAME malloc+hipMemset COMMAND malloc_hipmemset)
|
||||
add_test(NAME hipMalloc+hipMemset COMMAND hipmalloc_hipmemset)
|
||||
add_test(NAME hipMallocManaged+DevAccess COMMAND hipmallocmanaged_devaccess)
|
||||
add_test(NAME hipMallocManaged+HostAccess COMMAND hipmallocmanaged_hostaccess)
|
||||
add_test(NAME hipMallocManaged+HipMemset COMMAND hipmallocmanaged_hipmemset)
|
72
README.md
Normal file
72
README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# mi300a-xnack
|
||||
|
||||
Test HSA\_XNACK behavior on AMD MI300A
|
||||
|
||||
XNACK refers to the GPU's ability to retry memory accesses that failed due a page fault (which normally would lead to a memory access error), and instead retrieve the missing page [(source)](https://rocm.docs.amd.com/en/docs-6.2.1/conceptual/gpu-memory.html#xnack)
|
||||
|
||||
## Quick Start
|
||||
|
||||
```c++
|
||||
./run.sh
|
||||
```
|
||||
|
||||
## ROCm 6.2.1 Results
|
||||
|
||||
### Device Name
|
||||
|
||||
| `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|
||||
|-|-|
|
||||
| `amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-` | `amdgcn-amd-amdhsa--gfx942:sramecc+:xnack+` |
|
||||
|
||||
* `xnack+`: XNACK is available and enabled
|
||||
* `xnack-`: XNACK is available and disabled [(source)](https://rocm.docs.amd.com/en/docs-6.2.1/conceptual/gpu-memory.html#xnack)
|
||||
|
||||
|
||||
### Compiled with `--offload-arch=gfx942` or no `--offload-arch` provided
|
||||
|
||||
*GPU kernels will run regardless of whether XNACK is enabled*
|
||||
|
||||
| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|
||||
|-|-|-|-|
|
||||
| hipMalloc | GPU Kernel | yes | yes |
|
||||
| hipMalloc | Host Loop | yes | yes |
|
||||
| hipMalloc | hipMemset | yes | yes |
|
||||
| malloc | GPU Kernel | **segfault** | **yes** |
|
||||
| malloc | Host Loop | yes | yes |
|
||||
| malloc | hipMemset | HIP runtime error | HIP runtime error |
|
||||
| hipMallocManaged | GPU Kernel | yes | yes |
|
||||
| hipMallocManaged | Host Loop | yes | yes |
|
||||
| hipMallocManaged | hipMemset | yes | yes |
|
||||
|
||||
### Compiled with `--offload-arch=gfx942:xnack-`
|
||||
|
||||
*GPU kernels will run only if XNACK is disabled*
|
||||
|
||||
| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|
||||
|-|-|-|-|
|
||||
| hipMalloc | GPU Kernel | yes | *N/A* |
|
||||
| hipMalloc | Host Loop | yes | yes |
|
||||
| hipMalloc | hipMemset | yes | yes |
|
||||
| malloc | GPU Kernel | **segfault** | **N/A** |
|
||||
| malloc | Host Loop | yes | yes |
|
||||
| malloc | hipMemset | HIP runtime error | HIP runtime error |
|
||||
| hipMallocManaged | GPU Kernel | yes | *N/A* |
|
||||
| hipMallocManaged | Host Loop | yes | yes |
|
||||
| hipMallocManaged | hipMemset | yes | yes |
|
||||
|
||||
### Compiled with `--offload-arch=gfx942:xnack+`
|
||||
|
||||
*GPU kernels will run only if XNACK is enabled*
|
||||
|
||||
| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|
||||
|-|-|-|-|
|
||||
| hipMalloc | GPU Kernel | *N/A* | yes |
|
||||
| hipMalloc | Host Loop | yes | yes |
|
||||
| hipMalloc | hipMemset | yes | yes |
|
||||
| malloc | GPU Kernel | *N/A* | **yes** |
|
||||
| malloc | Host Loop | yes | yes |
|
||||
| malloc | hipMemset | HIP runtime error | HIP runtime error |
|
||||
| hipMallocManaged | GPU Kernel | *N/A* | yes |
|
||||
| hipMallocManaged | Host Loop | yes | yes |
|
||||
| hipMallocManaged | hipMemset | yes | yes |
|
||||
|
30
hipmalloc_devaccess.cpp
Normal file
30
hipmalloc_devaccess.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP(e) \
|
||||
if (hipError_t err = (e); err != hipSuccess) { \
|
||||
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
__global__ void set(double* p, size_t n) {
|
||||
for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) {
|
||||
p[idx] = idx;
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
size_t n = 1024 * 1024;
|
||||
double *p;
|
||||
|
||||
HIP(hipMalloc(&p, sizeof(double) * n));
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n);
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
23
hipmalloc_hipmemset.cpp
Normal file
23
hipmalloc_hipmemset.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP(e) \
|
||||
if (hipError_t err = (e); err != hipSuccess) { \
|
||||
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
size_t n = 1024 * 1024;
|
||||
double *p;
|
||||
HIP(hipMalloc(&p, sizeof(double)*n));
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
HIP(hipMemset(p, 17, n * sizeof(double)));
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
25
hipmalloc_hostaccess.cpp
Normal file
25
hipmalloc_hostaccess.cpp
Normal file
@@ -0,0 +1,25 @@
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP(e) \
|
||||
if (hipError_t err = (e); err != hipSuccess) { \
|
||||
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
size_t n = 1024 * 1024;
|
||||
double *p;
|
||||
|
||||
HIP(hipMalloc(&p, sizeof(double) * n));
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
p[i] = i;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
31
hipmallocmanaged_devaccess.cpp
Normal file
31
hipmallocmanaged_devaccess.cpp
Normal file
@@ -0,0 +1,31 @@
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP(e) \
|
||||
if (hipError_t err = (e); err != hipSuccess) { \
|
||||
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
__global__ void set(double* p, size_t n) {
|
||||
for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) {
|
||||
p[idx] = idx;
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
size_t n = 1024 * 1024;
|
||||
double *p;
|
||||
|
||||
HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal));
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n);
|
||||
HIP(hipDeviceSynchronize());
|
||||
HIP(hipFree(p));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
23
hipmallocmanaged_hipmemset.cpp
Normal file
23
hipmallocmanaged_hipmemset.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP(e) \
|
||||
if (hipError_t err = (e); err != hipSuccess) { \
|
||||
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
size_t n = 1024 * 1024;
|
||||
double *p;
|
||||
|
||||
HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal));
|
||||
HIP(hipDeviceSynchronize());
|
||||
HIP(hipMemset(p, 17, sizeof(double)*n));
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
27
hipmallocmanaged_hostaccess.cpp
Normal file
27
hipmallocmanaged_hostaccess.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP(e) \
|
||||
if (hipError_t err = (e); err != hipSuccess) { \
|
||||
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
size_t n = 1024 * 1024;
|
||||
double *p;
|
||||
|
||||
HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal));
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
p[i] = i;
|
||||
}
|
||||
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
29
malloc_devaccess.cpp
Normal file
29
malloc_devaccess.cpp
Normal file
@@ -0,0 +1,29 @@
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP(e) \
|
||||
if (hipError_t err = (e); err != hipSuccess) { \
|
||||
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
__global__ void set(double* p, size_t n) {
|
||||
for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) {
|
||||
p[idx] = idx;
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
size_t n = 1024 * 1024;
|
||||
double *p;
|
||||
p = (double*)malloc(sizeof(double)*n);
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n);
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
23
malloc_hipmemset.cpp
Normal file
23
malloc_hipmemset.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP(e) \
|
||||
if (hipError_t err = (e); err != hipSuccess) { \
|
||||
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
size_t n = 1024 * 1024;
|
||||
double *p;
|
||||
p = (double*)malloc(sizeof(double)*n);
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
HIP(hipMemset(p, 17, n * sizeof(double)));
|
||||
HIP(hipDeviceSynchronize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
24
malloc_hostaccess.cpp
Normal file
24
malloc_hostaccess.cpp
Normal file
@@ -0,0 +1,24 @@
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP(e) \
|
||||
if (hipError_t err = (e); err != hipSuccess) { \
|
||||
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
size_t n = 1024 * 1024;
|
||||
double *p;
|
||||
|
||||
p = (double*)malloc(sizeof(double) * n);
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
p[i] = i;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
52
run.sh
Executable file
52
run.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#! /bin/bash
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
if ! module is-loaded rocm; then
|
||||
echo module load rocm
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "==================="
|
||||
echo "= HSA_XNACK unset ="
|
||||
echo "==================="
|
||||
unset HSA_XNACK
|
||||
rocminfo | grep xnack
|
||||
|
||||
echo "==================="
|
||||
echo "= HSA_XNACK=1 ="
|
||||
echo "==================="
|
||||
export HSA_XNACK=1
|
||||
rocminfo | grep xnack
|
||||
|
||||
echo "==================="
|
||||
echo "= HSA_XNACK=0 ="
|
||||
echo "==================="
|
||||
export HSA_XNACK=0
|
||||
rocminfo | grep xnack
|
||||
|
||||
set +eou pipefail
|
||||
|
||||
for arch in "" "--offload-arch=gfx942:xnack+" "--offload-arch=gfx942:xnack-" "--offload-arch=gfx942"; do
|
||||
|
||||
rm -rf "build-$arch"
|
||||
cmake -S . -B "build-$arch" -DCMAKE_CXX_COMPILER=hipcc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-Wall -Wextra $arch"
|
||||
VERBOSE=1 make -C "build-$arch"
|
||||
|
||||
echo "================================"
|
||||
echo "= HSA_XNACK unset $arch ="
|
||||
echo "================================"
|
||||
unset HSA_XNACK
|
||||
ctest --test-dir "build-$arch"
|
||||
echo "================================"
|
||||
echo "= HSA_XNACK=1 $arch ="
|
||||
echo "================================"
|
||||
export HSA_XNACK=1
|
||||
ctest --test-dir "build-$arch"
|
||||
echo "================================"
|
||||
echo "= HSA_XNACK=0 $arch ="
|
||||
echo "================================"
|
||||
export HSA_XNACK=0
|
||||
ctest --test-dir "build-$arch"
|
||||
|
||||
done;
|
Reference in New Issue
Block a user