From 666f01a23da7f10ae8131dd48da14400b6f5f131 Mon Sep 17 00:00:00 2001 From: Oskar Lappi Date: Fri, 5 Jun 2020 19:48:40 +0300 Subject: [PATCH] Benchmarking program for scalar mpi reductions, and nonbatch script for running benchmarks - New program mpi_reduce_bench - runs testcases defined in source - writes all benchmark results to a csv file, tags the testcase and benchmark run - takes optional argument for benchmark tag, default benchmark tag is a timestamp - New script mpibench.sh - runs the mpi_reduce_bench with defined parameters: - number of tasks - number of nodes - the benchmark tag for mpi_reduce_bench, default tag is the current git HEAD short hash --- CMakeLists.txt | 5 + samples/mpi_reduce_bench/CMakeLists.txt | 3 + samples/mpi_reduce_bench/main.cc | 135 ++++++++++++++++++++++++ samples/mpi_reduce_bench/mpibench.sh | 63 +++++++++++ 4 files changed, 206 insertions(+) create mode 100644 samples/mpi_reduce_bench/CMakeLists.txt create mode 100644 samples/mpi_reduce_bench/main.cc create mode 100755 samples/mpi_reduce_bench/mpibench.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index a5a514b..e1e762f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,11 @@ if (BUILD_SAMPLES) add_subdirectory(samples/cpptest) add_subdirectory(samples/mpitest) add_subdirectory(samples/benchmark) + add_subdirectory(samples/mpi_reduce_bench) + add_custom_target(copy-mpibench-script ALL + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_SOURCE_DIR}/samples/mpi_reduce_bench/mpibench.sh + ${CMAKE_CURRENT_BINARY_DIR}/mpibench.sh) add_subdirectory(samples/bwtest) add_subdirectory(samples/genbenchmarkscripts) endif() diff --git a/samples/mpi_reduce_bench/CMakeLists.txt b/samples/mpi_reduce_bench/CMakeLists.txt new file mode 100644 index 0000000..b04f80a --- /dev/null +++ b/samples/mpi_reduce_bench/CMakeLists.txt @@ -0,0 +1,3 @@ +## benchmark +add_executable(mpi_reduce_bench main.cc) +target_link_libraries(mpi_reduce_bench astaroth_core astaroth_utils) diff --git a/samples/mpi_reduce_bench/main.cc b/samples/mpi_reduce_bench/main.cc new file mode 100644 index 0000000..7c279d3 --- /dev/null +++ b/samples/mpi_reduce_bench/main.cc @@ -0,0 +1,135 @@ +/* + Copyright (C) 2014-2020, Johannes Pekkila, Miikka Vaisala. + + This file is part of Astaroth. + + Astaroth is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Astaroth is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Astaroth. If not, see . +*/ +/** + Running: benchmark -np +*/ +#include "astaroth.h" +#include "astaroth_utils.h" + +#include "errchk.h" +#include "timer_hires.h" + +#if AC_MPI_ENABLED + +#include + +#include +#include + +#include +#include +#include +#include + +int +main(int argc, char** argv) +{ + MPI_Init(NULL, NULL); + int nprocs, pid; + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + // CPU alloc + AcMeshInfo info; + acLoadConfig(AC_DEFAULT_CONFIG, &info); + + char* benchmark_label; + + if (argc > 1){ + benchmark_label = argv[1]; + } else { + auto timestamp = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + benchmark_label = std::ctime(×tamp); + benchmark_label[strcspn(benchmark_label, "\n")] = '\0'; + } + + //clang-format off + std::vector scalarReductionTests { + acCreateScalReductionTestCase("Scalar MAX" , VTXBUF_UUX, RTYPE_MAX), + acCreateScalReductionTestCase("Scalar MIN" , VTXBUF_UUX, RTYPE_MIN), + acCreateScalReductionTestCase("Scalar RMS" , VTXBUF_UUX, RTYPE_RMS), + acCreateScalReductionTestCase("Scalar RMS_EXP", VTXBUF_UUX, RTYPE_RMS_EXP), + acCreateScalReductionTestCase("Scalar SUM" , VTXBUF_UUX, RTYPE_SUM) + }; + + std::vector vectorReductionTests { + acCreateVecReductionTestCase("Vector MAX" , VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ, RTYPE_MAX), + acCreateVecReductionTestCase("Vector MIN" , VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ, RTYPE_MIN), + acCreateVecReductionTestCase("Vector RMS" , VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ, RTYPE_RMS), + acCreateVecReductionTestCase("Vector RMS_EXP", VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ, RTYPE_RMS_EXP), + acCreateVecReductionTestCase("Vector SUM" , VTXBUF_UUX, VTXBUF_UUY, VTXBUF_UUZ, RTYPE_SUM) + }; + //clang-format on + + // GPU alloc & compute + acGridInit(info); + + for (auto& testCase : scalarReductionTests) { + // Percentiles + const size_t num_iters = 100; + const double nth_percentile = 0.90; + std::vector results; // ms + results.reserve(num_iters); + + // Benchmark + Timer t; + + for (size_t i = 0; i < num_iters; ++i) { + acGridSynchronizeStream(STREAM_ALL); + timer_reset(&t); + acGridSynchronizeStream(STREAM_ALL); + acGridReduceScal(STREAM_DEFAULT, testCase.rtype, testCase.vtxbuf, &testCase.candidate); + acGridSynchronizeStream(STREAM_ALL); + results.push_back(timer_diff_nsec(t) / 1e6); + acGridSynchronizeStream(STREAM_ALL); + } + + if (!pid) { + std::sort(results.begin(), results.end(), + [](const double& a, const double& b) { return a < b; }); + fprintf(stdout, + "Reduction time %g ms (%gth " + "percentile)--------------------------------------\n", + results[nth_percentile * num_iters], 100 * nth_percentile); + + char path[4096] = "mpi_reduction_benchmark.csv"; + + FILE* fp = fopen(path, "a"); + ERRCHK_ALWAYS(fp); + + // Format + // benchmark label, test label, nprocs, measured (ms) + fprintf(fp, "\"%s\",\"%s\", %d, %g\n", benchmark_label, testCase.label, nprocs, results[nth_percentile * num_iters]); + fclose(fp); + } + } + acGridQuit(); + MPI_Finalize(); + return EXIT_SUCCESS; +} + +#else +int +main(void) +{ + printf("The library was built without MPI support, cannot run mpitest. Rebuild Astaroth with " + "cmake -DMPI_ENABLED=ON .. to enable.\n"); + return EXIT_FAILURE; +} +#endif // AC_MPI_ENABLES diff --git a/samples/mpi_reduce_bench/mpibench.sh b/samples/mpi_reduce_bench/mpibench.sh new file mode 100755 index 0000000..6517bf7 --- /dev/null +++ b/samples/mpi_reduce_bench/mpibench.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +#defaults +default_num_procs=8 +default_num_nodes=2 + +num_procs=$default_num_procs +num_nodes=$default_num_nodes + +script_name=$0 + +print_usage(){ + echo "Usage: $script_name [Options]" + echo "\tRuns mpi_reduce_bench, which will write benchmark results" + echo "Options:" + echo "\t -n " + echo "\t\t-n option to slurm, default=$default_num_procs" + echo "\t -N " + echo "\t\t-N option to slurm, default=$default_num_nodes" + echo "\t -t " + echo "\t\tA benchmark tag that will be added to the mpi_reduction_benchmark.csv file" + echo "\t\tBy default the current git HEAD short hash will be used as a tag" +} + +while getopts n:N:t: opt +do + case "$opt" in + n) + if [ $OPTARG ] + then + num_procs=$OPTARG + else + print_usage + exit 1 + fi + ;; + N) + if [ $OPTARG ] + then + num_nodes=$OPTARG + else + print_usage + exit 1 + fi + ;; + t) + if [ $OPTARG ] + then + benchmark_label=$OPTARG + else + print_usage + exit 1 + fi + ;; + esac +done + +if [ -z "$benchmark_label" ] +then + benchmark_label=$(git rev-parse --short HEAD) +fi +set -x +srun --account=project_2000403 --gres=gpu:v100:4 --mem=48000 -t 00:14:59 -p gpu -n ${num_procs} -N ${num_nodes} ./mpi_reduce_bench ${benchmark_label}