add CUDA12 H100 build

This commit is contained in:
Carl Pearson
2023-12-07 10:44:44 -07:00
parent 60e2f6e819
commit aa25898ebd
3 changed files with 93 additions and 0 deletions

57
blake-h100-cuda12/download.sh Executable file
View File

@@ -0,0 +1,57 @@
#! /bin/bash
set -eou pipefail
shopt -s globstar
source "$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"/env.sh
LOG_DIR="$(date +"%Y%m%d_%H%M%S")_build"
export LOG_DIR
mkdir -p "$LOG_DIR"
# intel blows up SSH for some reason?
git clone git@github.com:kokkos/kokkos.git "$KOKKOS_SRC" || true
(cd "$KOKKOS_SRC" && git checkout $KOKKOS_SHA) || true
git clone git@github.com:kokkos/kokkos-kernels.git "$KERNELS_SRC" || true
(cd "$KERNELS_SRC" && git checkout $KERNELS_SHA) || true
module list |& tee "$LOG_DIR/module-list.log"
lscpu |& tee "$LOG_DIR/lscpu.log"
hostname |& tee "$LOG_DIR/hostname.log"
env |& tee "$LOG_DIR/env.log" || true
## Configure Kokkos
cmake -S "$KOKKOS_SRC" -B "$KOKKOS_BUILD" \
-DCMAKE_INSTALL_PREFIX="$KOKKOS_INSTALL" \
-DCMAKE_CXX_STANDARD=17 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER="$KOKKOS_SRC"/bin/nvcc_wrapper \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ARCH_NATIVE=ON \
-DKokkos_ARCH_HOPPER90=ON \
|& tee "$LOG_DIR/kokkos-config.log"
## Build & Install Kokkos
cmake --build "$KOKKOS_BUILD" -j "$(nproc)" -t install \
|& tee "$LOG_DIR/kokkos-build.log"
## Configure Kernels
cmake -S "$KERNELS_SRC" -B "$KERNELS_BUILD" \
-DKokkos_DIR="$KOKKOS_INSTALL/lib64/cmake/Kokkos" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER="$KOKKOS_SRC"/bin/nvcc_wrapper \
-DKokkosKernels_ENABLE_TPL_CUSPARSE=ON \
-DKokkosKernels_ENABLE_TPL_CUBLASE=ON \
-DKokkosKernels_ENABLE_TESTS=ON \
-DKokkosKernels_ENABLE_PERFTESTS=ON \
-DKokkosKernels_ENABLE_BENCHMARK=ON \
|& tee "$LOG_DIR/kernels-config.log"
## Build Kernels
for unit_test_dir in "$KERNELS_BUILD"/**/unit_test; do
VERBOSE=1 make -C "$unit_test_dir" -j "$(nproc)" |& tee -a "$LOG_DIR/kernels-build.log"
done
VERBOSE=1 make -C "$KERNELS_BUILD" -j "$(nproc)" \
KokkosKernels_Blas3_gemm_benchmark \
KokkosKernels_sparse_spmv_benchmark \
|& tee -a "$LOG_DIR/kernels-build.log"

14
blake-h100-cuda12/env.sh Normal file
View File

@@ -0,0 +1,14 @@
export ROOT_DIR=/projects/cwpears/kug-2023/blake-h100-cuda12
export KOKKOS_SHA=f8788ef2ae1940b627cc6ebc6abeef2c34e7e8dc # 2023 11 30
export KOKKOS_SRC="$ROOT_DIR/kokkos-${KOKKOS_SHA:0:8}"
export KOKKOS_BUILD="$ROOT_DIR/kokkos-build-${KOKKOS_SHA:0:8}"
export KOKKOS_INSTALL="$ROOT_DIR/kokkos-install-${KOKKOS_SHA:0:8}"
export KERNELS_SHA=a80eb9114ddda2d9454e4f3cc8a3dd5143ecdfc8 # 2023 11 30
export KERNELS_SRC="$ROOT_DIR/kernels-${KERNELS_SHA:0:8}"
export KERNELS_BUILD="$ROOT_DIR/kernels-build-${KERNELS_SHA:0:8}"
source /projects/x86-64-icelake-rocky8/spack-config/blake-setup-user-module-env.sh
module load gcc/11.3.0 cuda/12.0.0
module load cmake

22
blake-h100-cuda12/run.sh Executable file
View File

@@ -0,0 +1,22 @@
#! /bin/bash
#SBATCH -N 1
#SBATCH -p H100
set -eou pipefail
source "$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"/env.sh
LOG_DIR="$(date +"%Y%m%d_%H%M%S")_run"
export LOG_DIR
mkdir -p "$LOG_DIR"
srun -n1 -t 1 lscpu |& tee "$LOG_DIR/lscpu.log" || true
srun -n1 -t 1 hostname |& tee "$LOG_DIR/hostname.log" || true
srun -n1 -t 1 cat /proc/cpuinfo |& tee "$LOG_DIR/cpuinfo.log" || true
srun -n1 -t 1 env |& tee "$LOG_DIR/env.log" || true
srun -N 1 -p H100 -n 1 -t 60 ctest --test-dir "$KERNELS_BUILD" |& tee "$LOG_DIR/ctest.log"
srun -N 1 -p H100 -n 1 -t 60 "$KERNELS_BUILD"/perf_test/blas/blas3/KokkosKernels_Blas3_gemm_benchmark --cuda 0 |& tee "$LOG_DIR/gemm.log"
srun -N 1 -p H100 -n 1 -t 60 "$KERNELS_BUILD"/perf_test/sparse/KokkosKernels_sparse_spmv_benchmark -f /projects/cwpears/sparc_gpu_problems/single_gpu/matrix.mm |& tee "$LOG_DIR/spmv.log"