diff --git a/blake-h100-cuda12/download.sh b/blake-h100-cuda12/download.sh new file mode 100755 index 0000000..d85f2fb --- /dev/null +++ b/blake-h100-cuda12/download.sh @@ -0,0 +1,57 @@ +#! /bin/bash + +set -eou pipefail +shopt -s globstar + +source "$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"/env.sh + +LOG_DIR="$(date +"%Y%m%d_%H%M%S")_build" +export LOG_DIR +mkdir -p "$LOG_DIR" + +# intel blows up SSH for some reason? +git clone git@github.com:kokkos/kokkos.git "$KOKKOS_SRC" || true +(cd "$KOKKOS_SRC" && git checkout $KOKKOS_SHA) || true +git clone git@github.com:kokkos/kokkos-kernels.git "$KERNELS_SRC" || true +(cd "$KERNELS_SRC" && git checkout $KERNELS_SHA) || true + +module list |& tee "$LOG_DIR/module-list.log" +lscpu |& tee "$LOG_DIR/lscpu.log" +hostname |& tee "$LOG_DIR/hostname.log" +env |& tee "$LOG_DIR/env.log" || true + +## Configure Kokkos +cmake -S "$KOKKOS_SRC" -B "$KOKKOS_BUILD" \ +-DCMAKE_INSTALL_PREFIX="$KOKKOS_INSTALL" \ +-DCMAKE_CXX_STANDARD=17 \ +-DCMAKE_BUILD_TYPE=Release \ +-DCMAKE_CXX_COMPILER="$KOKKOS_SRC"/bin/nvcc_wrapper \ +-DKokkos_ENABLE_CUDA=ON \ +-DKokkos_ARCH_NATIVE=ON \ +-DKokkos_ARCH_HOPPER90=ON \ +|& tee "$LOG_DIR/kokkos-config.log" + +## Build & Install Kokkos +cmake --build "$KOKKOS_BUILD" -j "$(nproc)" -t install \ +|& tee "$LOG_DIR/kokkos-build.log" + +## Configure Kernels +cmake -S "$KERNELS_SRC" -B "$KERNELS_BUILD" \ +-DKokkos_DIR="$KOKKOS_INSTALL/lib64/cmake/Kokkos" \ +-DCMAKE_BUILD_TYPE=Release \ +-DCMAKE_CXX_COMPILER="$KOKKOS_SRC"/bin/nvcc_wrapper \ +-DKokkosKernels_ENABLE_TPL_CUSPARSE=ON \ +-DKokkosKernels_ENABLE_TPL_CUBLASE=ON \ +-DKokkosKernels_ENABLE_TESTS=ON \ +-DKokkosKernels_ENABLE_PERFTESTS=ON \ +-DKokkosKernels_ENABLE_BENCHMARK=ON \ +|& tee "$LOG_DIR/kernels-config.log" + +## Build Kernels +for unit_test_dir in "$KERNELS_BUILD"/**/unit_test; do + VERBOSE=1 make -C "$unit_test_dir" -j "$(nproc)" |& tee -a "$LOG_DIR/kernels-build.log" +done +VERBOSE=1 make -C "$KERNELS_BUILD" -j "$(nproc)" \ +KokkosKernels_Blas3_gemm_benchmark \ +KokkosKernels_sparse_spmv_benchmark \ +|& tee -a "$LOG_DIR/kernels-build.log" diff --git a/blake-h100-cuda12/env.sh b/blake-h100-cuda12/env.sh new file mode 100644 index 0000000..3d3ded6 --- /dev/null +++ b/blake-h100-cuda12/env.sh @@ -0,0 +1,14 @@ +export ROOT_DIR=/projects/cwpears/kug-2023/blake-h100-cuda12 + +export KOKKOS_SHA=f8788ef2ae1940b627cc6ebc6abeef2c34e7e8dc # 2023 11 30 +export KOKKOS_SRC="$ROOT_DIR/kokkos-${KOKKOS_SHA:0:8}" +export KOKKOS_BUILD="$ROOT_DIR/kokkos-build-${KOKKOS_SHA:0:8}" +export KOKKOS_INSTALL="$ROOT_DIR/kokkos-install-${KOKKOS_SHA:0:8}" + +export KERNELS_SHA=a80eb9114ddda2d9454e4f3cc8a3dd5143ecdfc8 # 2023 11 30 +export KERNELS_SRC="$ROOT_DIR/kernels-${KERNELS_SHA:0:8}" +export KERNELS_BUILD="$ROOT_DIR/kernels-build-${KERNELS_SHA:0:8}" + +source /projects/x86-64-icelake-rocky8/spack-config/blake-setup-user-module-env.sh +module load gcc/11.3.0 cuda/12.0.0 +module load cmake \ No newline at end of file diff --git a/blake-h100-cuda12/run.sh b/blake-h100-cuda12/run.sh new file mode 100755 index 0000000..e5d046d --- /dev/null +++ b/blake-h100-cuda12/run.sh @@ -0,0 +1,22 @@ +#! /bin/bash +#SBATCH -N 1 +#SBATCH -p H100 + +set -eou pipefail + +source "$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"/env.sh + +LOG_DIR="$(date +"%Y%m%d_%H%M%S")_run" +export LOG_DIR +mkdir -p "$LOG_DIR" + + +srun -n1 -t 1 lscpu |& tee "$LOG_DIR/lscpu.log" || true +srun -n1 -t 1 hostname |& tee "$LOG_DIR/hostname.log" || true +srun -n1 -t 1 cat /proc/cpuinfo |& tee "$LOG_DIR/cpuinfo.log" || true +srun -n1 -t 1 env |& tee "$LOG_DIR/env.log" || true + + +srun -N 1 -p H100 -n 1 -t 60 ctest --test-dir "$KERNELS_BUILD" |& tee "$LOG_DIR/ctest.log" +srun -N 1 -p H100 -n 1 -t 60 "$KERNELS_BUILD"/perf_test/blas/blas3/KokkosKernels_Blas3_gemm_benchmark --cuda 0 |& tee "$LOG_DIR/gemm.log" +srun -N 1 -p H100 -n 1 -t 60 "$KERNELS_BUILD"/perf_test/sparse/KokkosKernels_sparse_spmv_benchmark -f /projects/cwpears/sparc_gpu_problems/single_gpu/matrix.mm |& tee "$LOG_DIR/spmv.log" \ No newline at end of file