MPI_Allreduce -> MPI_Reduce for MPI reductions + benchmark batch script

Slightly ugly because this changes the benchmark behaviour slightly However we now have a way to run batch benchmarks from one script, no need to generate new ones
2020-06-06 22:53:08 +03:00
parent eb05e02793
commit 53b48bb8ce
2 changed files with 53 additions and 39 deletions
--- a/src/core/device.cc
+++ b/src/core/device.cc
@@ -1645,23 +1645,22 @@ acMPIReduceScal(const AcReal local_result, const ReductionType rtype, AcReal* re
    MPI_Datatype datatype = MPI_FLOAT;
 #endif

-    /*
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    */

    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    AcReal mpi_res;
-    MPI_Allreduce(&local_result, &mpi_res, 1, datatype, op, MPI_COMM_WORLD);
-
-    if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
-        const AcReal inv_n = AcReal(1.) / (grid.nn.x * grid.decomposition.x * grid.nn.y *
+    MPI_Reduce(&local_result, &mpi_res, 1, datatype, op, 0, MPI_COMM_WORLD);
+    if (rank == 0){
+        if (rtype == RTYPE_RMS || rtype == RTYPE_RMS_EXP) {
+            const AcReal inv_n = AcReal(1.) / (grid.nn.x * grid.decomposition.x * grid.nn.y *
                                           grid.decomposition.y * grid.nn.z * grid.decomposition.z);
-        mpi_res            = sqrt(inv_n * mpi_res);
+            mpi_res            = sqrt(inv_n * mpi_res);
+        }
+        *result = mpi_res;
    }
-    *result = mpi_res;
    return AC_SUCCESS;
 }