MPI_Allreduce -> MPI_Reduce for MPI reductions + benchmark batch script

Slightly ugly because this changes the benchmark behaviour slightly However we now have a way to run batch benchmarks from one script, no need to generate new ones
2020-06-06 22:53:08 +03:00
parent eb05e02793
commit 53b48bb8ce
2 changed files with 53 additions and 39 deletions
--- a/samples/mpi_reduce_bench/mpibench.sh
+++ b/samples/mpi_reduce_bench/mpibench.sh
@@ -11,47 +11,45 @@ script_name=$0
 print_usage(){
    echo "Usage: $script_name [Options]"
-    echo "\tRuns mpi_reduce_bench, which will write benchmark results"
+    echo "      Runs ./mpi_reduce_bench, which will write benchmark results to a csv file"
    echo "      Remember to run this script from your build directory"
    echo "      The benchmarks are submitted with sbatch, unless the -i option is passed"
    echo "Options:"
-    echo "\t -n <num_procs>"
+    echo "      -n <num_procs>"
-    echo "\t\t-n option to slurm, default=$default_num_procs"
+    echo "              number of tasks for slurm, default=$default_num_procs"
-    echo "\t -N <num_nodes>"
+    echo "      -N <num_nodes>"
-    echo "\t\t-N option to slurm, default=$default_num_nodes"
+    echo "              number of nodes for slurm, default=$default_num_nodes"
-    echo "\t -t <tag>"
+    echo "      -t <tag>"
-    echo "\t\tA benchmark tag that will be added to the mpi_reduction_benchmark.csv file"
+    echo "              A benchmark tag that will be added to the mpi_reduction_benchmark.csv file"
-    echo "\t\tBy default the current git HEAD short hash will be used as a tag"
+    echo "              By default the current git HEAD short hash will be used as a tag"
    echo "      -i"
    echo "              Run the benchmark interactively with srun instead of sbatch"
    echo "      -h"
    echo "              Print this message"
 }
-while getopts n:N:t: opt
+while getopts :n:N:t:ih opt
 do
    case "$opt" in
        n)
            if [ $OPTARG ]
            then
            num_procs=$OPTARG
            else
                print_usage
                exit 1
            fi
        ;;
        N)
            if [ $OPTARG ]
            then
            num_nodes=$OPTARG
            else
                print_usage
                exit 1
            fi
        ;;
        t)
            if [ $OPTARG ]
            then
            benchmark_label=$OPTARG
-            else
+        ;;
        i)
            interactively=1
        ;;
        h)
            print_usage
            exit 0
        ;;
        ?)
            print_usage
            exit 1
            fi
        ;;
    esac
 done
@@ -60,4 +58,21 @@ then
    benchmark_label=$(git rev-parse --short HEAD)
 fi
 set -x
-srun --account=project_2000403 --gres=gpu:v100:4 --mem=48000 -t 00:14:59 -p gpu -n ${num_procs} -N ${num_nodes} ./mpi_reduce_bench ${benchmark_label}
+
 if [ -z "$interactively"]
 then
 sbatch <<EOF
 #!/bin/sh
 #BATCH --job-name=astaroth
 #SBATCH --account=project_2000403
 #SBATCH --time=00:14:59
 #SBATCH --mem=48000
 #SBATCH --partition=gpu
 #SBATCH --gres=gpu:v100:4
 #SBATCH -n ${num_procs}
 #SBATCH -N ${num_nodes}
 srun ./mpi_reduce_bench ${benchmark_label}
 EOF
 else
    srun --account=project_2000403 --gres=gpu:v100:4 --mem=48000 -t 00:14:59 -p gpu -n ${num_procs} -N ${num_nodes} ./mpi_reduce_bench ${benchmark_label}
 fi
--- a/src/core/device.cc
+++ b/src/core/device.cc
@@ -1645,23 +1645,22 @@ acMPIReduceScal(const AcReal local_result, const ReductionType rtype, AcReal* re
    MPI_Datatype datatype = MPI_FLOAT;
 #endif
    /*
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    */
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
    AcReal mpi_res;
-    MPI_Allreduce(&local_result, &mpi_res, 1, datatype, op, MPI_COMM_WORLD);
+    MPI_Reduce(&local_result, &mpi_res, 1, datatype, op, 0, MPI_COMM_WORLD);
-
+    if (rank == 0){
        if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
            const AcReal inv_n = AcReal(1.) / (grid.nn.x * grid.decomposition.x * grid.nn.y *
                                           grid.decomposition.y * grid.nn.z * grid.decomposition.z);
            mpi_res            = sqrt(inv_n * mpi_res);
        }
        *result = mpi_res;
    }
    return AC_SUCCESS;
 }