Added timing to the MPI benchmark
This commit is contained in:
@@ -975,6 +975,7 @@ acHostCommunicateHalosMPI(AcMesh* submesh)
|
|||||||
// From Astaroth Utils
|
// From Astaroth Utils
|
||||||
#include "src/utils/config_loader.h"
|
#include "src/utils/config_loader.h"
|
||||||
#include "src/utils/memory.h"
|
#include "src/utils/memory.h"
|
||||||
|
#include "src/utils/timer_hires.h"
|
||||||
#include "src/utils/verification.h"
|
#include "src/utils/verification.h"
|
||||||
// --smpiargs="-gpu"
|
// --smpiargs="-gpu"
|
||||||
AcResult
|
AcResult
|
||||||
@@ -1017,6 +1018,10 @@ acDeviceRunMPITest(void)
|
|||||||
AcMeshInfo info;
|
AcMeshInfo info;
|
||||||
acLoadConfig(AC_DEFAULT_CONFIG, &info);
|
acLoadConfig(AC_DEFAULT_CONFIG, &info);
|
||||||
|
|
||||||
|
const int nn = 256;
|
||||||
|
info.int_params[AC_nx] = info.int_params[AC_ny] = info.int_params[AC_nz] = nn;
|
||||||
|
acUpdateConfig(&info);
|
||||||
|
|
||||||
AcMesh model, candidate;
|
AcMesh model, candidate;
|
||||||
|
|
||||||
// Master CPU
|
// Master CPU
|
||||||
@@ -1060,23 +1065,36 @@ acDeviceRunMPITest(void)
|
|||||||
acDeviceCreate(0, submesh_info, &device);
|
acDeviceCreate(0, submesh_info, &device);
|
||||||
acDeviceLoadMesh(device, STREAM_DEFAULT, submesh);
|
acDeviceLoadMesh(device, STREAM_DEFAULT, submesh);
|
||||||
|
|
||||||
///// Communication start
|
////////////////////////////// Timer start
|
||||||
{
|
const int num_iters = 100;
|
||||||
const int3 start = (int3){0, 0, NGHOST};
|
Timer total_time;
|
||||||
const int3 end = (int3){subgrid_m.x, subgrid_m.y, subgrid_m.z - NGHOST};
|
timer_reset(&total_time);
|
||||||
acDevicePeriodicBoundconds(device, STREAM_DEFAULT, start, end);
|
for (int i = 0; i < num_iters; ++i) {
|
||||||
}
|
///// Communication start
|
||||||
|
{
|
||||||
|
const int3 start = (int3){0, 0, NGHOST};
|
||||||
|
const int3 end = (int3){subgrid_m.x, subgrid_m.y, subgrid_m.z - NGHOST};
|
||||||
|
acDevicePeriodicBoundconds(device, STREAM_DEFAULT, start, end);
|
||||||
|
}
|
||||||
#if 1 // GPU-GPU if CUDA-aware MPI, otherwise managed CPU-GPU-GPU-CPU
|
#if 1 // GPU-GPU if CUDA-aware MPI, otherwise managed CPU-GPU-GPU-CPU
|
||||||
acDeviceSynchronizeStream(device, STREAM_DEFAULT);
|
acDeviceSynchronizeStream(device, STREAM_DEFAULT);
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
acDeviceCommunicateHalosMPI(device); // Includes periodic bounds at first and last ghost zone
|
acDeviceCommunicateHalosMPI(
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
device); // Includes periodic bounds at first and last ghost zone
|
||||||
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
#else // Explicit GPU-CPU-CPU-GPU
|
#else // Explicit GPU-CPU-CPU-GPU
|
||||||
acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh);
|
acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh);
|
||||||
acHostCommunicateHalosMPI(&submesh);
|
acHostCommunicateHalosMPI(&submesh);
|
||||||
acDeviceLoadMesh(device, STREAM_DEFAULT, submesh);
|
acDeviceLoadMesh(device, STREAM_DEFAULT, submesh);
|
||||||
#endif
|
#endif
|
||||||
///// Communication end
|
///// Communication end
|
||||||
|
}
|
||||||
|
if (pid == 0) {
|
||||||
|
const double ms_elapsed = timer_diff_nsec(total_time) / 1e6;
|
||||||
|
printf("vertices: %d^3, iterations: %d\n", nn, num_iters);
|
||||||
|
printf("Total time: %f ms\n", ms_elapsed);
|
||||||
|
}
|
||||||
|
////////////////////////////// Timer end
|
||||||
|
|
||||||
acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh);
|
acDeviceStoreMesh(device, STREAM_DEFAULT, &submesh);
|
||||||
acDeviceDestroy(device);
|
acDeviceDestroy(device);
|
||||||
|
|||||||
Reference in New Issue
Block a user