diff --git a/samples/genbenchmarkscripts/CMakeLists.txt b/samples/genbenchmarkscripts/CMakeLists.txt new file mode 100644 index 0000000..6115fde --- /dev/null +++ b/samples/genbenchmarkscripts/CMakeLists.txt @@ -0,0 +1,8 @@ +add_executable(genbenchmarkscripts main.c) + +add_custom_command( + TARGET genbenchmarkscripts POST_BUILD + COMMAND genbenchmarkscripts + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMENT "Generating benchmark scripts" +) diff --git a/samples/genbenchmarkscripts/main.c b/samples/genbenchmarkscripts/main.c new file mode 100644 index 0000000..f9d5506 --- /dev/null +++ b/samples/genbenchmarkscripts/main.c @@ -0,0 +1,46 @@ +#include +#include +#include +#include + +int +main(void) +{ + const int max_nprocs = 128; + for (int nprocs = 1; nprocs <= max_nprocs; nprocs *= 2) { + char filename[4096]; + sprintf(filename, "benchmark_%d.sh", nprocs); + + FILE* fp = fopen(filename, "w"); + assert(fp); + + // Boilerplate + fprintf(fp, "#!/bin/bash\n"); + fprintf(fp, "#BATCH --job-name=astaroth\n"); + fprintf(fp, "#SBATCH --account=project_2000403\n"); + fprintf(fp, "#SBATCH --time=00:14:59\n"); + fprintf(fp, "#SBATCH --mem=24000\n"); + fprintf(fp, "#SBATCH --partition=gputest\n"); + + // nprocs, nodes, gpus + const int max_gpus_per_node = 4; + const int gpus_per_node = nprocs < max_gpus_per_node ? nprocs : max_gpus_per_node; + const int nodes = (int)ceil((double)nprocs / max_gpus_per_node); + fprintf(fp, "#SBATCH --gres=gpu:v100:%d\n", gpus_per_node); + fprintf(fp, "#SBATCH -n %d\n", nprocs); + fprintf(fp, "#SBATCH -N %d\n", nodes); + + // Modules + fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n"); + fprintf(fp, "export UCX_MEMTYPE_CACHE=n\n"); + + // Profile and run + fprintf(fp, "mkdir -p profile_%d\n", nprocs); + fprintf(fp, "srun nvprof --annotate-mpi openmpi -o profile_%d/%%p.nvprof ./benchmark\n", + nprocs); + + fclose(fp); + } + + return EXIT_SUCCESS; +} diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 152f811..e1d25a1 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,11 +1,14 @@ +find_package(CUDAToolkit) + ## Astaroth Core add_library(astaroth_core STATIC device.cc node.cc astaroth.cc) -target_link_libraries(astaroth_core astaroth_utils astaroth_kernels cudart) +target_link_libraries(astaroth_core astaroth_utils astaroth_kernels CUDA::cudart_static) ## Options if (MPI_ENABLED) find_package(MPI) - target_link_libraries(astaroth_core MPI::MPI_CXX) + find_package(OpenMP) + target_link_libraries(astaroth_core MPI::MPI_CXX OpenMP::OpenMP_CXX) endif() if (MULTIGPU_ENABLED)