Initial commit
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
build*
|
21
CMakeLists.txt
Normal file
21
CMakeLists.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
project(MI300A-XNACK LANGUAGES CXX)
|
||||||
|
add_executable(hipmalloc_hostaccess hipmalloc_hostaccess.cpp)
|
||||||
|
add_executable(malloc_hostaccess malloc_hostaccess.cpp)
|
||||||
|
add_executable(malloc_devaccess malloc_devaccess.cpp)
|
||||||
|
add_executable(hipmalloc_devaccess hipmalloc_devaccess.cpp)
|
||||||
|
add_executable(malloc_hipmemset malloc_hipmemset.cpp)
|
||||||
|
add_executable(hipmalloc_hipmemset hipmalloc_hipmemset.cpp)
|
||||||
|
add_executable(hipmallocmanaged_devaccess hipmallocmanaged_devaccess.cpp)
|
||||||
|
add_executable(hipmallocmanaged_hostaccess hipmallocmanaged_hostaccess.cpp)
|
||||||
|
add_executable(hipmallocmanaged_hipmemset hipmallocmanaged_hipmemset.cpp)
|
||||||
|
|
||||||
|
enable_testing()
|
||||||
|
add_test(NAME hipMalloc+HostAccess COMMAND hipmalloc_hostaccess)
|
||||||
|
add_test(NAME malloc+HostAccess COMMAND malloc_hostaccess)
|
||||||
|
add_test(NAME malloc+DevAccess COMMAND malloc_devaccess)
|
||||||
|
add_test(NAME hipMalloc+DevAccess COMMAND hipmalloc_devaccess)
|
||||||
|
add_test(NAME malloc+hipMemset COMMAND malloc_hipmemset)
|
||||||
|
add_test(NAME hipMalloc+hipMemset COMMAND hipmalloc_hipmemset)
|
||||||
|
add_test(NAME hipMallocManaged+DevAccess COMMAND hipmallocmanaged_devaccess)
|
||||||
|
add_test(NAME hipMallocManaged+HostAccess COMMAND hipmallocmanaged_hostaccess)
|
||||||
|
add_test(NAME hipMallocManaged+HipMemset COMMAND hipmallocmanaged_hipmemset)
|
72
README.md
Normal file
72
README.md
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
# mi300a-xnack
|
||||||
|
|
||||||
|
Test HSA\_XNACK behavior on AMD MI300A
|
||||||
|
|
||||||
|
XNACK refers to the GPU's ability to retry memory accesses that failed due a page fault (which normally would lead to a memory access error), and instead retrieve the missing page [(source)](https://rocm.docs.amd.com/en/docs-6.2.1/conceptual/gpu-memory.html#xnack)
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```c++
|
||||||
|
./run.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## ROCm 6.2.1 Results
|
||||||
|
|
||||||
|
### Device Name
|
||||||
|
|
||||||
|
| `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|
||||||
|
|-|-|
|
||||||
|
| `amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-` | `amdgcn-amd-amdhsa--gfx942:sramecc+:xnack+` |
|
||||||
|
|
||||||
|
* `xnack+`: XNACK is available and enabled
|
||||||
|
* `xnack-`: XNACK is available and disabled [(source)](https://rocm.docs.amd.com/en/docs-6.2.1/conceptual/gpu-memory.html#xnack)
|
||||||
|
|
||||||
|
|
||||||
|
### Compiled with `--offload-arch=gfx942` or no `--offload-arch` provided
|
||||||
|
|
||||||
|
*GPU kernels will run regardless of whether XNACK is enabled*
|
||||||
|
|
||||||
|
| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|
||||||
|
|-|-|-|-|
|
||||||
|
| hipMalloc | GPU Kernel | yes | yes |
|
||||||
|
| hipMalloc | Host Loop | yes | yes |
|
||||||
|
| hipMalloc | hipMemset | yes | yes |
|
||||||
|
| malloc | GPU Kernel | **segfault** | **yes** |
|
||||||
|
| malloc | Host Loop | yes | yes |
|
||||||
|
| malloc | hipMemset | HIP runtime error | HIP runtime error |
|
||||||
|
| hipMallocManaged | GPU Kernel | yes | yes |
|
||||||
|
| hipMallocManaged | Host Loop | yes | yes |
|
||||||
|
| hipMallocManaged | hipMemset | yes | yes |
|
||||||
|
|
||||||
|
### Compiled with `--offload-arch=gfx942:xnack-`
|
||||||
|
|
||||||
|
*GPU kernels will run only if XNACK is disabled*
|
||||||
|
|
||||||
|
| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|
||||||
|
|-|-|-|-|
|
||||||
|
| hipMalloc | GPU Kernel | yes | *N/A* |
|
||||||
|
| hipMalloc | Host Loop | yes | yes |
|
||||||
|
| hipMalloc | hipMemset | yes | yes |
|
||||||
|
| malloc | GPU Kernel | **segfault** | **N/A** |
|
||||||
|
| malloc | Host Loop | yes | yes |
|
||||||
|
| malloc | hipMemset | HIP runtime error | HIP runtime error |
|
||||||
|
| hipMallocManaged | GPU Kernel | yes | *N/A* |
|
||||||
|
| hipMallocManaged | Host Loop | yes | yes |
|
||||||
|
| hipMallocManaged | hipMemset | yes | yes |
|
||||||
|
|
||||||
|
### Compiled with `--offload-arch=gfx942:xnack+`
|
||||||
|
|
||||||
|
*GPU kernels will run only if XNACK is enabled*
|
||||||
|
|
||||||
|
| Allocator | Access | `HSA_XNACK=0` (or unset) | `HSA_XNACK=1` |
|
||||||
|
|-|-|-|-|
|
||||||
|
| hipMalloc | GPU Kernel | *N/A* | yes |
|
||||||
|
| hipMalloc | Host Loop | yes | yes |
|
||||||
|
| hipMalloc | hipMemset | yes | yes |
|
||||||
|
| malloc | GPU Kernel | *N/A* | **yes** |
|
||||||
|
| malloc | Host Loop | yes | yes |
|
||||||
|
| malloc | hipMemset | HIP runtime error | HIP runtime error |
|
||||||
|
| hipMallocManaged | GPU Kernel | *N/A* | yes |
|
||||||
|
| hipMallocManaged | Host Loop | yes | yes |
|
||||||
|
| hipMallocManaged | hipMemset | yes | yes |
|
||||||
|
|
30
hipmalloc_devaccess.cpp
Normal file
30
hipmalloc_devaccess.cpp
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
|
||||||
|
#define HIP(e) \
|
||||||
|
if (hipError_t err = (e); err != hipSuccess) { \
|
||||||
|
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void set(double* p, size_t n) {
|
||||||
|
for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) {
|
||||||
|
p[idx] = idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
|
||||||
|
size_t n = 1024 * 1024;
|
||||||
|
double *p;
|
||||||
|
|
||||||
|
HIP(hipMalloc(&p, sizeof(double) * n));
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n);
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
23
hipmalloc_hipmemset.cpp
Normal file
23
hipmalloc_hipmemset.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
|
||||||
|
#define HIP(e) \
|
||||||
|
if (hipError_t err = (e); err != hipSuccess) { \
|
||||||
|
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
|
||||||
|
size_t n = 1024 * 1024;
|
||||||
|
double *p;
|
||||||
|
HIP(hipMalloc(&p, sizeof(double)*n));
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
HIP(hipMemset(p, 17, n * sizeof(double)));
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
25
hipmalloc_hostaccess.cpp
Normal file
25
hipmalloc_hostaccess.cpp
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
|
||||||
|
#define HIP(e) \
|
||||||
|
if (hipError_t err = (e); err != hipSuccess) { \
|
||||||
|
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
|
||||||
|
size_t n = 1024 * 1024;
|
||||||
|
double *p;
|
||||||
|
|
||||||
|
HIP(hipMalloc(&p, sizeof(double) * n));
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
p[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
31
hipmallocmanaged_devaccess.cpp
Normal file
31
hipmallocmanaged_devaccess.cpp
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
|
||||||
|
#define HIP(e) \
|
||||||
|
if (hipError_t err = (e); err != hipSuccess) { \
|
||||||
|
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void set(double* p, size_t n) {
|
||||||
|
for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) {
|
||||||
|
p[idx] = idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
|
||||||
|
size_t n = 1024 * 1024;
|
||||||
|
double *p;
|
||||||
|
|
||||||
|
HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal));
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n);
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
HIP(hipFree(p));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
23
hipmallocmanaged_hipmemset.cpp
Normal file
23
hipmallocmanaged_hipmemset.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
|
||||||
|
#define HIP(e) \
|
||||||
|
if (hipError_t err = (e); err != hipSuccess) { \
|
||||||
|
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
|
||||||
|
size_t n = 1024 * 1024;
|
||||||
|
double *p;
|
||||||
|
|
||||||
|
HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal));
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
HIP(hipMemset(p, 17, sizeof(double)*n));
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
27
hipmallocmanaged_hostaccess.cpp
Normal file
27
hipmallocmanaged_hostaccess.cpp
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
|
||||||
|
#define HIP(e) \
|
||||||
|
if (hipError_t err = (e); err != hipSuccess) { \
|
||||||
|
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
|
||||||
|
size_t n = 1024 * 1024;
|
||||||
|
double *p;
|
||||||
|
|
||||||
|
HIP(hipMallocManaged(&p, sizeof(double) * n, hipMemAttachGlobal));
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
p[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
29
malloc_devaccess.cpp
Normal file
29
malloc_devaccess.cpp
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
|
||||||
|
#define HIP(e) \
|
||||||
|
if (hipError_t err = (e); err != hipSuccess) { \
|
||||||
|
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void set(double* p, size_t n) {
|
||||||
|
for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < n; idx += gridDim.x * blockDim.x) {
|
||||||
|
p[idx] = idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
|
||||||
|
size_t n = 1024 * 1024;
|
||||||
|
double *p;
|
||||||
|
p = (double*)malloc(sizeof(double)*n);
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
hipLaunchKernelGGL(set, dim3(128), dim3(128), 0, 0, p, n);
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
23
malloc_hipmemset.cpp
Normal file
23
malloc_hipmemset.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
|
||||||
|
#define HIP(e) \
|
||||||
|
if (hipError_t err = (e); err != hipSuccess) { \
|
||||||
|
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
|
||||||
|
size_t n = 1024 * 1024;
|
||||||
|
double *p;
|
||||||
|
p = (double*)malloc(sizeof(double)*n);
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
HIP(hipMemset(p, 17, n * sizeof(double)));
|
||||||
|
HIP(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
24
malloc_hostaccess.cpp
Normal file
24
malloc_hostaccess.cpp
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
|
||||||
|
#define HIP(e) \
|
||||||
|
if (hipError_t err = (e); err != hipSuccess) { \
|
||||||
|
std::cerr << __FILE__ << ":" << __LINE__ << " " << err << "\n"; \
|
||||||
|
exit(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
|
||||||
|
size_t n = 1024 * 1024;
|
||||||
|
double *p;
|
||||||
|
|
||||||
|
p = (double*)malloc(sizeof(double) * n);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
p[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
52
run.sh
Executable file
52
run.sh
Executable file
@@ -0,0 +1,52 @@
|
|||||||
|
#! /bin/bash
|
||||||
|
|
||||||
|
set -eou pipefail
|
||||||
|
|
||||||
|
if ! module is-loaded rocm; then
|
||||||
|
echo module load rocm
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==================="
|
||||||
|
echo "= HSA_XNACK unset ="
|
||||||
|
echo "==================="
|
||||||
|
unset HSA_XNACK
|
||||||
|
rocminfo | grep xnack
|
||||||
|
|
||||||
|
echo "==================="
|
||||||
|
echo "= HSA_XNACK=1 ="
|
||||||
|
echo "==================="
|
||||||
|
export HSA_XNACK=1
|
||||||
|
rocminfo | grep xnack
|
||||||
|
|
||||||
|
echo "==================="
|
||||||
|
echo "= HSA_XNACK=0 ="
|
||||||
|
echo "==================="
|
||||||
|
export HSA_XNACK=0
|
||||||
|
rocminfo | grep xnack
|
||||||
|
|
||||||
|
set +eou pipefail
|
||||||
|
|
||||||
|
for arch in "" "--offload-arch=gfx942:xnack+" "--offload-arch=gfx942:xnack-" "--offload-arch=gfx942"; do
|
||||||
|
|
||||||
|
rm -rf "build-$arch"
|
||||||
|
cmake -S . -B "build-$arch" -DCMAKE_CXX_COMPILER=hipcc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-Wall -Wextra $arch"
|
||||||
|
VERBOSE=1 make -C "build-$arch"
|
||||||
|
|
||||||
|
echo "================================"
|
||||||
|
echo "= HSA_XNACK unset $arch ="
|
||||||
|
echo "================================"
|
||||||
|
unset HSA_XNACK
|
||||||
|
ctest --test-dir "build-$arch"
|
||||||
|
echo "================================"
|
||||||
|
echo "= HSA_XNACK=1 $arch ="
|
||||||
|
echo "================================"
|
||||||
|
export HSA_XNACK=1
|
||||||
|
ctest --test-dir "build-$arch"
|
||||||
|
echo "================================"
|
||||||
|
echo "= HSA_XNACK=0 $arch ="
|
||||||
|
echo "================================"
|
||||||
|
export HSA_XNACK=0
|
||||||
|
ctest --test-dir "build-$arch"
|
||||||
|
|
||||||
|
done;
|
Reference in New Issue
Block a user