MPI_Allreduce -> MPI_Reduce for MPI reductions + benchmark batch script

Slightly ugly because this changes the benchmark behaviour slightly However we now have a way to run batch benchmarks from one script, no need to generate new ones
2020-06-06 22:53:08 +03:00
parent eb05e02793
commit 53b48bb8ce
2 changed files with 53 additions and 39 deletions
--- a/samples/mpi_reduce_bench/mpibench.sh
+++ b/samples/mpi_reduce_bench/mpibench.sh
@@ -11,47 +11,45 @@ script_name=$0

 print_usage(){
    echo "Usage: $script_name [Options]"
-    echo "\tRuns mpi_reduce_bench, which will write benchmark results"
+    echo "      Runs ./mpi_reduce_bench, which will write benchmark results to a csv file"
+    echo "      Remember to run this script from your build directory"
+    echo "      The benchmarks are submitted with sbatch, unless the -i option is passed"
    echo "Options:"
-    echo "\t -n <num_procs>"
-    echo "\t\t-n option to slurm, default=$default_num_procs"
-    echo "\t -N <num_nodes>"
-    echo "\t\t-N option to slurm, default=$default_num_nodes"
-    echo "\t -t <tag>"
-    echo "\t\tA benchmark tag that will be added to the mpi_reduction_benchmark.csv file"
-    echo "\t\tBy default the current git HEAD short hash will be used as a tag"
+    echo "      -n <num_procs>"
+    echo "              number of tasks for slurm, default=$default_num_procs"
+    echo "      -N <num_nodes>"
+    echo "              number of nodes for slurm, default=$default_num_nodes"
+    echo "      -t <tag>"
+    echo "              A benchmark tag that will be added to the mpi_reduction_benchmark.csv file"
+    echo "              By default the current git HEAD short hash will be used as a tag"
+    echo "      -i"
+    echo "              Run the benchmark interactively with srun instead of sbatch"
+    echo "      -h"
+    echo "              Print this message"
 }

-while getopts n:N:t: opt
+while getopts :n:N:t:ih opt
 do
    case "$opt" in
        n)
-            if [ $OPTARG ]
-            then
-                num_procs=$OPTARG
-            else
-                print_usage
-                exit 1
-            fi
+            num_procs=$OPTARG
        ;;
        N)
-            if [ $OPTARG ]
-            then
-                num_nodes=$OPTARG
-            else
-                print_usage
-                exit 1
-            fi
+            num_nodes=$OPTARG
        ;;
        t)
-            if [ $OPTARG ]
-            then
-                benchmark_label=$OPTARG
-            else
-                print_usage
-                exit 1
-            fi
+            benchmark_label=$OPTARG
        ;;
+        i)
+            interactively=1
+        ;;
+        h)
+            print_usage
+            exit 0
+        ;;
+        ?)
+            print_usage
+            exit 1
    esac
 done

@@ -60,4 +58,21 @@ then
    benchmark_label=$(git rev-parse --short HEAD)
 fi
 set -x
-srun --account=project_2000403 --gres=gpu:v100:4 --mem=48000 -t 00:14:59 -p gpu -n ${num_procs} -N ${num_nodes} ./mpi_reduce_bench ${benchmark_label}
+
+if [ -z "$interactively"]
+then
+sbatch <<EOF
+#!/bin/sh
+#BATCH --job-name=astaroth
+#SBATCH --account=project_2000403
+#SBATCH --time=00:14:59
+#SBATCH --mem=48000
+#SBATCH --partition=gpu
+#SBATCH --gres=gpu:v100:4
+#SBATCH -n ${num_procs}
+#SBATCH -N ${num_nodes}
+srun ./mpi_reduce_bench ${benchmark_label}
+EOF
+else
+    srun --account=project_2000403 --gres=gpu:v100:4 --mem=48000 -t 00:14:59 -p gpu -n ${num_procs} -N ${num_nodes} ./mpi_reduce_bench ${benchmark_label}
+fi
--- a/src/core/device.cc
+++ b/src/core/device.cc
@@ -1645,23 +1645,22 @@ acMPIReduceScal(const AcReal local_result, const ReductionType rtype, AcReal* re
    MPI_Datatype datatype = MPI_FLOAT;
 #endif

-    /*
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    */

    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    AcReal mpi_res;
-    MPI_Allreduce(&local_result, &mpi_res, 1, datatype, op, MPI_COMM_WORLD);
-
-    if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
-        const AcReal inv_n = AcReal(1.) / (grid.nn.x * grid.decomposition.x * grid.nn.y *
+    MPI_Reduce(&local_result, &mpi_res, 1, datatype, op, 0, MPI_COMM_WORLD);
+    if (rank == 0){
+        if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
+            const AcReal inv_n = AcReal(1.) / (grid.nn.x * grid.decomposition.x * grid.nn.y *
                                           grid.decomposition.y * grid.nn.z * grid.decomposition.z);
-        mpi_res            = sqrt(inv_n * mpi_res);
+            mpi_res            = sqrt(inv_n * mpi_res);
+        }
+        *result = mpi_res;
    }
-    *result = mpi_res;
    return AC_SUCCESS;
 }