MPI benchmark now writes out the 95th percentile instead of average running time

This commit is contained in:
jpekkila
2019-12-08 23:12:23 +02:00
parent 2ab605e125
commit 420f8b9e06

View File

@@ -978,7 +978,13 @@ acDeviceIntegrateStepMPI(const Device device, const AcReal dt)
#include "src/utils/memory.h" #include "src/utils/memory.h"
#include "src/utils/timer_hires.h" #include "src/utils/timer_hires.h"
#include "src/utils/verification.h" #include "src/utils/verification.h"
#include <vector>
#include <algorithm>
// --smpiargs="-gpu" // --smpiargs="-gpu"
AcResult AcResult
acDeviceRunMPITest(void) acDeviceRunMPITest(void)
{ {
@@ -1015,10 +1021,15 @@ acDeviceRunMPITest(void)
acLoadConfig(AC_DEFAULT_CONFIG, &info); acLoadConfig(AC_DEFAULT_CONFIG, &info);
// Large mesh dim // Large mesh dim
const int nn = 512; const int nn = 128;
const int num_iters = 10;
info.int_params[AC_nx] = info.int_params[AC_ny] = info.int_params[AC_nz] = nn; info.int_params[AC_nx] = info.int_params[AC_ny] = info.int_params[AC_nz] = nn;
acUpdateConfig(&info); acUpdateConfig(&info);
#define VERIFY (0)
#if VERIFY
AcMesh model, candidate; AcMesh model, candidate;
// Master CPU // Master CPU
@@ -1029,6 +1040,7 @@ acDeviceRunMPITest(void)
acMeshRandomize(&model); acMeshRandomize(&model);
acMeshRandomize(&candidate); acMeshRandomize(&candidate);
} }
#endif
ERRCHK_ALWAYS(info.int_params[AC_nz] % num_processes == 0); ERRCHK_ALWAYS(info.int_params[AC_nz] % num_processes == 0);
/// DECOMPOSITION /// DECOMPOSITION
@@ -1047,7 +1059,9 @@ acDeviceRunMPITest(void)
AcMesh submesh; AcMesh submesh;
acMeshCreate(submesh_info, &submesh); acMeshCreate(submesh_info, &submesh);
acMeshRandomize(&submesh); acMeshRandomize(&submesh);
#if VERIFY
acDeviceDistributeMeshMPI(model, &submesh); acDeviceDistributeMeshMPI(model, &submesh);
#endif
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
int devices_per_node = -1; int devices_per_node = -1;
@@ -1058,54 +1072,70 @@ acDeviceRunMPITest(void)
acDeviceLoadMesh(device, STREAM_DEFAULT, submesh); acDeviceLoadMesh(device, STREAM_DEFAULT, submesh);
// Warmup // Warmup
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 10; ++i) {
acDeviceIntegrateStepMPI(device, FLT_EPSILON); acDeviceIntegrateStepMPI(device, 0);
} }
acDeviceSynchronizeStream(device, STREAM_ALL); acDeviceSynchronizeStream(device, STREAM_ALL);
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
// Benchmark // Benchmark
const int num_iters = 100; std::vector<double> results;
results.reserve(num_iters);
Timer total_time; Timer total_time;
timer_reset(&total_time); timer_reset(&total_time);
Timer step_time;
for (int i = 0; i < num_iters; ++i) { for (int i = 0; i < num_iters; ++i) {
// acDeviceBoundStepMPI(device); const AcReal dt = FLT_EPSILON;
acDeviceIntegrateStepMPI(device, FLT_EPSILON); // TODO recheck
timer_reset(&step_time);
acDeviceIntegrateStepMPI(device, dt);
acDeviceSynchronizeStream(device, STREAM_ALL);
MPI_Barrier(MPI_COMM_WORLD);
results.push_back(timer_diff_nsec(step_time) / 1e6);
} }
acDeviceSynchronizeStream(device, STREAM_ALL);
MPI_Barrier(MPI_COMM_WORLD); const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
const double nth_percentile = 0.95;
std::sort(results.begin(), results.end(), [](const double& a, const double& b){ return a < b; });
if (pid == 0) { if (pid == 0) {
const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
printf("vertices: %d^3, iterations: %d\n", nn, num_iters); printf("vertices: %d^3, iterations: %d\n", nn, num_iters);
printf("Total time: %f ms\n", ms_elapsed); printf("Total time: %f ms\n", ms_elapsed);
printf("Time per step: %f ms\n", ms_elapsed / num_iters); printf("Time per step: %f ms\n", ms_elapsed / num_iters);
const size_t nth_index = int(nth_percentile * num_iters);
printf("%dth percentile per step: %f ms\n", int(100 * nth_percentile), results[nth_index]);
char buf[256]; char buf[256];
sprintf(buf, "procs_%d.bench", num_processes); sprintf(buf, "procs_%d_%dth_perc.bench", num_processes, int(100 * nth_percentile));
FILE* fp = fopen(buf, "w"); FILE* fp = fopen(buf, "w");
ERRCHK_ALWAYS(fp); ERRCHK_ALWAYS(fp);
fprintf(fp, "%d, %g", num_processes, ms_elapsed / num_iters); fprintf(fp, "%d, %g", num_processes, results[nth_index]);
fclose(fp); fclose(fp);
} }
////////////////////////////// Timer end ////////////////////////////// Timer end
acDeviceBoundStepMPI(device); acDeviceBoundStepMPI(device);
acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh); acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh);
acDeviceDestroy(device); acDeviceDestroy(device);
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
#if VERIFY
acDeviceGatherMeshMPI(submesh, &candidate); acDeviceGatherMeshMPI(submesh, &candidate);
#endif
acMeshDestroy(&submesh); acMeshDestroy(&submesh);
#define VERIFY (1) #if VERIFY
// Master CPU // Master CPU
if (pid == 0) { if (pid == 0) {
#if VERIFY
acMeshApplyPeriodicBounds(&model); acMeshApplyPeriodicBounds(&model);
acVerifyMesh(model, candidate); acVerifyMesh(model, candidate);
#endif
acMeshDestroy(&model); acMeshDestroy(&model);
acMeshDestroy(&candidate); acMeshDestroy(&candidate);
} }
#endif
MPI_Finalize(); MPI_Finalize();
return AC_FAILURE; return AC_FAILURE;