diff --git a/samples/genbenchmarkscripts/genbenchmarkscripts/CMakeLists.txt b/samples/genbenchmarkscripts/genbenchmarkscripts/CMakeLists.txt deleted file mode 100644 index 6115fde..0000000 --- a/samples/genbenchmarkscripts/genbenchmarkscripts/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -add_executable(genbenchmarkscripts main.c) - -add_custom_command( - TARGET genbenchmarkscripts POST_BUILD - COMMAND genbenchmarkscripts - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMENT "Generating benchmark scripts" -) diff --git a/samples/genbenchmarkscripts/genbenchmarkscripts/main.c b/samples/genbenchmarkscripts/genbenchmarkscripts/main.c deleted file mode 100644 index d7b953b..0000000 --- a/samples/genbenchmarkscripts/genbenchmarkscripts/main.c +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include -#include -#include -#include - -int -main(void) -{ - const int max_nprocs = 64; - for (int nprocs = 1; nprocs <= max_nprocs; nprocs *= 2) { - char filename[4096]; - sprintf(filename, "benchmark_%d.sh", nprocs); - - FILE* fp = fopen(filename, "w"); - assert(fp); - - // Boilerplate - fprintf(fp, "#!/bin/bash\n"); - fprintf(fp, "#BATCH --job-name=astaroth\n"); // OK - fprintf(fp, "#SBATCH --account=project_2000403\n"); // OK - fprintf(fp, "#SBATCH --time=04:00:00\n"); // OK - fprintf(fp, "#SBATCH --mem=0\n"); // OK - fprintf(fp, "#SBATCH --partition=gpu\n"); // OK - fprintf(fp, "#SBATCH --exclusive\n"); // OK - fprintf(fp, "#SBATCH --cpus-per-task=10\n"); // OK - fprintf(fp, "#SBATCH --output=benchmark-%d-%%j.out\n", nprocs); - // HACK: exclude misconfigured nodes on Puhti - fprintf(fp, "#SBATCH -x " - "r04g[05-06],r02g02,r14g04,r04g07,r16g07,r18g[02-03],r15g08,r17g06,r13g04\n"); - // fprintf(fp, "#SBATCH --cpus-per-task=10\n"); - - // nprocs, nodes, gpus - const int max_gpus_per_node = 4; - const int gpus_per_node = nprocs < max_gpus_per_node ? nprocs : max_gpus_per_node; - const int nodes = (int)ceil((double)nprocs / max_gpus_per_node); - fprintf(fp, "#SBATCH --gres=gpu:v100:%d\n", gpus_per_node); // OK - fprintf(fp, "#SBATCH -n %d\n", nprocs); // OK - fprintf(fp, "#SBATCH -N %d\n", nodes); // OK - // fprintf(fp, "#SBATCH --exclusive\n"); - // if (nprocs >= 4) - // fprintf(fp, "#SBATCH --ntasks-per-socket=2\n"); - - // Modules - // OpenMPI - fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake openmpi/4.0.3-cuda nccl\n"); - // fprintf(fp, "export UCX_TLS=rc,sm,cuda_copy,gdr_copy,cuda_ipc\n"); // - // https://www.open-mpi.org/fa fprintf(fp, "export PSM2_CUDA=1\nexport PSM2_GPUDIRECT=1\n"); - // if (nprocs >= 32) - // fprintf(fp, "export UCX_TLS=ud_x,cuda_copy,gdr_copy,cuda_ipc\n"); // - // https://www.open-mpi.org/fa - - // HPCX - // fprintf(fp, "module load gcc/8.3.0 cuda/10.1.168 cmake hpcx-mpi/2.5.0-cuda nccl\n"); - // fprintf(fp, "export UCX_MEMTYPE_CACHE=n\n"); // Workaround for bug in hpcx-mpi/2.5.0 - - // Profile and run - // fprintf(fp, "mkdir -p profile_%d\n", nprocs); - - /* - const int nx = 256; // max size 2048; - const int ny = nx; - const int nz = nx; - - fprintf(fp, - //"srun nvprof --annotate-mpi openmpi -o profile_%d/%%p.nvprof ./benchmark %d %d " - //"%d\n", - "srun ./benchmark %d %d %d\n", nx, ny, nz); - */ - // fprintf(fp, "srun ./benchmark %d %d %d\n", nx, ny, nz); - - const char* files[] = { - "benchmark_decomp_1D", "benchmark_decomp_2D", "benchmark_decomp_3D", - "benchmark_decomp_1D_comm", "benchmark_decomp_2D_comm", "benchmark_decomp_3D_comm", - "benchmark_meshsize_256", "benchmark_meshsize_512", "benchmark_meshsize_1024", - "benchmark_meshsize_2048", "benchmark_stencilord_2", "benchmark_stencilord_4", - "benchmark_stencilord_6", "benchmark_stencilord_8", "benchmark_timings_control", - "benchmark_timings_comp", "benchmark_timings_comm", "benchmark_timings_default", - "benchmark_timings_corners", "benchmark_weak_128", "benchmark_weak_256", - "benchmark_weak_512", - }; - for (size_t i = 0; i < sizeof(files) / sizeof(files[0]); ++i) { - int nn = 256; - if (strcmp(files[i], "benchmark_meshsize_512") == 0) - nn = 512; - else if (strcmp(files[i], "benchmark_meshsize_1024") == 0) - nn = 1024; - else if (strcmp(files[i], "benchmark_meshsize_2048") == 0) - nn = 2048; - else if (strcmp(files[i], "benchmark_weak_128") == 0) - nn = 128; - else if (strcmp(files[i], "benchmark_weak_512") == 0) - nn = 512; - - // W/ Fredriks tunings - // (may cause Assertion `status == UCS_OK' failed errors) - // fprintf(fp, - // "$(cd %s && UCX_RNDV_THRESH=16384 UCX_RNDV_SCHEME=get_zcopy " - // "UCX_MAX_RNDV_RAILS=1 srun ./benchmark %d %d %d && cd ..)\n", - // files[i], nn, nn, nn); - if (nodes >= 2) { - fprintf(fp, - "$(cd %s && UCX_RNDV_THRESH=16384 UCX_RNDV_SCHEME=get_zcopy " - "UCX_MAX_RNDV_RAILS=1 srun --kill-on-bad-exit=0 ./benchmark %d %d %d && rm " - "-f core.* && cd ..)\n", - files[i], nn, nn, nn); - } - else { - fprintf(fp, - "$(cd %s && srun --kill-on-bad-exit=0 ./benchmark %d %d %d && rm -f core.* " - "&& cd ..)\n", - files[i], nn, nn, nn); - } - } - - fclose(fp); - } - - return EXIT_SUCCESS; -}